Skip to content

Commit 1217c03

Browse files
garimasi514gitster
authored andcommitted
commit-graph: reuse existing Bloom filters during write
Add logic to a) parse Bloom filter information from the commit graph file and, b) re-use existing Bloom filters. See Documentation/technical/commit-graph-format for the format in which the Bloom filter information is written to the commit graph file. To read Bloom filter for a given commit with lexicographic position 'i' we need to: 1. Read BIDX[i] which essentially gives us the starting index in BDAT for filter of commit i+1. It is essentially the index past the end of the filter of commit i. It is called end_index in the code. 2. For i>0, read BIDX[i-1] which will give us the starting index in BDAT for filter of commit i. It is called the start_index in the code. For the first commit, where i = 0, Bloom filter data starts at the beginning, just past the header in the BDAT chunk. Hence, start_index will be 0. 3. The length of the filter will be end_index - start_index, because BIDX[i] gives the cumulative 8-byte words including the ith commit's filter. We toggle whether Bloom filters should be recomputed based on the compute_if_not_present flag. Helped-by: Derrick Stolee <[email protected]> Signed-off-by: Garima Singh <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 76ffbca commit 1217c03

File tree

4 files changed

+55
-6
lines changed

4 files changed

+55
-6
lines changed

bloom.c

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
#include "diffcore.h"
55
#include "revision.h"
66
#include "hashmap.h"
7+
#include "commit-graph.h"
8+
#include "commit.h"
79

810
define_commit_slab(bloom_filter_slab, struct bloom_filter);
911

@@ -26,6 +28,36 @@ static inline unsigned char get_bitmask(uint32_t pos)
2628
return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1));
2729
}
2830

31+
static int load_bloom_filter_from_graph(struct commit_graph *g,
32+
struct bloom_filter *filter,
33+
struct commit *c)
34+
{
35+
uint32_t lex_pos, start_index, end_index;
36+
37+
while (c->graph_pos < g->num_commits_in_base)
38+
g = g->base_graph;
39+
40+
/* The commit graph commit 'c' lives in doesn't carry bloom filters. */
41+
if (!g->chunk_bloom_indexes)
42+
return 0;
43+
44+
lex_pos = c->graph_pos - g->num_commits_in_base;
45+
46+
end_index = get_be32(g->chunk_bloom_indexes + 4 * lex_pos);
47+
48+
if (lex_pos > 0)
49+
start_index = get_be32(g->chunk_bloom_indexes + 4 * (lex_pos - 1));
50+
else
51+
start_index = 0;
52+
53+
filter->len = end_index - start_index;
54+
filter->data = (unsigned char *)(g->chunk_bloom_data +
55+
sizeof(unsigned char) * start_index +
56+
BLOOMDATA_CHUNK_HEADER_SIZE);
57+
58+
return 1;
59+
}
60+
2961
/*
3062
* Calculate the murmur3 32-bit hash value for the given data
3163
* using the given seed.
@@ -127,7 +159,8 @@ void init_bloom_filters(void)
127159
}
128160

129161
struct bloom_filter *get_bloom_filter(struct repository *r,
130-
struct commit *c)
162+
struct commit *c,
163+
int compute_if_not_present)
131164
{
132165
struct bloom_filter *filter;
133166
struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
@@ -140,6 +173,20 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
140173

141174
filter = bloom_filter_slab_at(&bloom_filters, c);
142175

176+
if (!filter->data) {
177+
load_commit_graph_info(r, c);
178+
if (c->graph_pos != COMMIT_NOT_FROM_GRAPH &&
179+
r->objects->commit_graph->chunk_bloom_indexes) {
180+
if (load_bloom_filter_from_graph(r->objects->commit_graph, filter, c))
181+
return filter;
182+
else
183+
return NULL;
184+
}
185+
}
186+
187+
if (filter->data || !compute_if_not_present)
188+
return filter;
189+
143190
repo_diff_setup(r, &diffopt);
144191
diffopt.flags.recursive = 1;
145192
diffopt.max_changes = max_changes;

bloom.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ struct bloom_filter_settings {
3232

3333
#define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10 }
3434
#define BITS_PER_WORD 8
35+
#define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t)
3536

3637
/*
3738
* A bloom_filter struct represents a data segment to
@@ -79,6 +80,7 @@ void add_key_to_filter(const struct bloom_key *key,
7980
void init_bloom_filters(void);
8081

8182
struct bloom_filter *get_bloom_filter(struct repository *r,
82-
struct commit *c);
83+
struct commit *c,
84+
int compute_if_not_present);
8385

8486
#endif

commit-graph.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,7 +1086,7 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f,
10861086
ctx->commits.nr);
10871087

10881088
while (list < last) {
1089-
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
1089+
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
10901090
cur_pos += filter->len;
10911091
display_progress(progress, ++i);
10921092
hashwrite_be32(f, cur_pos);
@@ -1115,7 +1115,7 @@ static void write_graph_chunk_bloom_data(struct hashfile *f,
11151115
hashwrite_be32(f, settings->bits_per_entry);
11161116

11171117
while (list < last) {
1118-
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
1118+
struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
11191119
display_progress(progress, ++i);
11201120
hashwrite(f, filter->data, filter->len * sizeof(unsigned char));
11211121
list++;
@@ -1296,7 +1296,7 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
12961296

12971297
for (i = 0; i < ctx->commits.nr; i++) {
12981298
struct commit *c = sorted_commits[i];
1299-
struct bloom_filter *filter = get_bloom_filter(ctx->r, c);
1299+
struct bloom_filter *filter = get_bloom_filter(ctx->r, c, 1);
13001300
ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len;
13011301
display_progress(progress, i + 1);
13021302
}

t/helper/test-bloom.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ static void get_bloom_filter_for_commit(const struct object_id *commit_oid)
3939
struct bloom_filter *filter;
4040
setup_git_directory();
4141
c = lookup_commit(the_repository, commit_oid);
42-
filter = get_bloom_filter(the_repository, c);
42+
filter = get_bloom_filter(the_repository, c, 1);
4343
print_bloom_filter(filter);
4444
}
4545

0 commit comments

Comments
 (0)