Skip to content

Commit 9491974

Browse files
derrickstoleegitster
authored andcommitted
bloom: fix logic in get_bloom_filter()
The get_bloom_filter() method is a bit complicated in some parts where it does not need to be. In particular, it needs to return a NULL filter only when compute_if_not_present is zero AND the filter data cannot be loaded from a commit-graph file. This currently happens by accident because the commit-graph does not load changed-path Bloom filters from an existing commit-graph when writing a new one. This will change in a later patch. Also clean up some style issues while we are here. One side-effect of returning a NULL filter is that the filters that are reported as "too large" will now be reported as NULL insead of length zero. This case was not properly covered before, so add a test. Further, remote the counting of the zero-length filters from revision.c and the trace2 logs. Helped-by: René Scharfe <[email protected]> Helped-by: SZEDER Gábor <[email protected]> Signed-off-by: Derrick Stolee <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 7b671f8 commit 9491974

File tree

4 files changed

+34
-19
lines changed

4 files changed

+34
-19
lines changed

bloom.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -186,24 +186,22 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
186186
struct diff_options diffopt;
187187
int max_changes = 512;
188188

189-
if (bloom_filters.slab_size == 0)
189+
if (!bloom_filters.slab_size)
190190
return NULL;
191191

192192
filter = bloom_filter_slab_at(&bloom_filters, c);
193193

194194
if (!filter->data) {
195195
load_commit_graph_info(r, c);
196196
if (c->graph_pos != COMMIT_NOT_FROM_GRAPH &&
197-
r->objects->commit_graph->chunk_bloom_indexes) {
198-
if (load_bloom_filter_from_graph(r->objects->commit_graph, filter, c))
199-
return filter;
200-
else
201-
return NULL;
202-
}
197+
r->objects->commit_graph->chunk_bloom_indexes)
198+
load_bloom_filter_from_graph(r->objects->commit_graph, filter, c);
203199
}
204200

205-
if (filter->data || !compute_if_not_present)
201+
if (filter->data)
206202
return filter;
203+
if (!compute_if_not_present)
204+
return NULL;
207205

208206
repo_diff_setup(r, &diffopt);
209207
diffopt.flags.recursive = 1;

commit-graph.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1098,7 +1098,8 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f,
10981098

10991099
while (list < last) {
11001100
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
1101-
cur_pos += filter->len;
1101+
size_t len = filter ? filter->len : 0;
1102+
cur_pos += len;
11021103
display_progress(progress, ++i);
11031104
hashwrite_be32(f, cur_pos);
11041105
list++;
@@ -1126,8 +1127,11 @@ static void write_graph_chunk_bloom_data(struct hashfile *f,
11261127

11271128
while (list < last) {
11281129
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
1130+
size_t len = filter ? filter->len : 0;
11291131
display_progress(progress, ++i);
1130-
hashwrite(f, filter->data, filter->len * sizeof(unsigned char));
1132+
1133+
if (len)
1134+
hashwrite(f, filter->data, len * sizeof(unsigned char));
11311135
list++;
11321136
}
11331137

revision.c

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -633,15 +633,13 @@ static unsigned int count_bloom_filter_maybe;
633633
static unsigned int count_bloom_filter_definitely_not;
634634
static unsigned int count_bloom_filter_false_positive;
635635
static unsigned int count_bloom_filter_not_present;
636-
static unsigned int count_bloom_filter_length_zero;
637636

638637
static void trace2_bloom_filter_statistics_atexit(void)
639638
{
640639
struct json_writer jw = JSON_WRITER_INIT;
641640

642641
jw_object_begin(&jw, 0);
643642
jw_object_intmax(&jw, "filter_not_present", count_bloom_filter_not_present);
644-
jw_object_intmax(&jw, "zero_length_filter", count_bloom_filter_length_zero);
645643
jw_object_intmax(&jw, "maybe", count_bloom_filter_maybe);
646644
jw_object_intmax(&jw, "definitely_not", count_bloom_filter_definitely_not);
647645
jw_object_intmax(&jw, "false_positive", count_bloom_filter_false_positive);
@@ -735,11 +733,6 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
735733
return -1;
736734
}
737735

738-
if (!filter->len) {
739-
count_bloom_filter_length_zero++;
740-
return -1;
741-
}
742-
743736
result = bloom_filter_contains(filter,
744737
revs->bloom_key,
745738
revs->bloom_filter_settings);

t/t4216-log-bloom.sh

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ setup () {
6060

6161
test_bloom_filters_used () {
6262
log_args=$1
63-
bloom_trace_prefix="statistics:{\"filter_not_present\":0,\"zero_length_filter\":0,\"maybe\""
63+
bloom_trace_prefix="statistics:{\"filter_not_present\":0,\"maybe\""
6464
setup "$log_args" &&
6565
grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" &&
6666
test_cmp log_wo_bloom log_w_bloom &&
@@ -142,7 +142,7 @@ test_expect_success 'setup - add commit-graph to the chain with Bloom filters' '
142142

143143
test_bloom_filters_used_when_some_filters_are_missing () {
144144
log_args=$1
145-
bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"zero_length_filter\":0,\"maybe\":8,\"definitely_not\":6"
145+
bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"maybe\":8,\"definitely_not\":6"
146146
setup "$log_args" &&
147147
grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" &&
148148
test_cmp log_wo_bloom log_w_bloom
@@ -152,4 +152,24 @@ test_expect_success 'Use Bloom filters if they exist in the latest but not all c
152152
test_bloom_filters_used_when_some_filters_are_missing "-- A/B"
153153
'
154154

155+
test_expect_success 'correctly report changes over limit' '
156+
git init 513changes &&
157+
(
158+
cd 513changes &&
159+
for i in $(test_seq 1 513)
160+
do
161+
echo $i >file$i.txt || return 1
162+
done &&
163+
git add . &&
164+
git commit -m "files" &&
165+
git commit-graph write --reachable --changed-paths &&
166+
for i in $(test_seq 1 513)
167+
do
168+
git -c core.commitGraph=false log -- file$i.txt >expect &&
169+
git log -- file$i.txt >actual &&
170+
test_cmp expect actual || return 1
171+
done
172+
)
173+
'
174+
155175
test_done

0 commit comments

Comments
 (0)