Skip to content

Commit 90d5518

Browse files
brandb97gitster
authored andcommitted
bloom: replace struct bloom_key * with struct bloom_keyvec
Previously, we stored bloom keys in a flat array and marked a commit as NOT TREESAME if any key reported "definitely not changed". To support multiple pathspec items, we now require that for each pathspec item, there exists a bloom key reporting "definitely not changed". This "for every" condition makes a flat array insufficient, so we introduce a new structure to group keys by a single pathspec item. `struct bloom_keyvec` is introduced to replace `struct bloom_key *` and `bloom_key_nr`. And because we want to support multiple pathspec items, we added a bloom_keyvec * and a bloom_keyvec_nr field to `struct rev_info` to represent an array of bloom_keyvecs. This commit still optimize only one pathspec item, thus bloom_keyvec_nr can only be 0 or 1. New bloom_keyvec_* functions are added to create and destroy a keyvec. bloom_filter_contains_vec() is added to check if all key in keyvec is contained in a bloom filter. Signed-off-by: Lidong Yan <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent b187353 commit 90d5518

File tree

4 files changed

+132
-49
lines changed

4 files changed

+132
-49
lines changed

bloom.c

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,55 @@ void deinit_bloom_filters(void)
278278
deep_clear_bloom_filter_slab(&bloom_filters, free_one_bloom_filter);
279279
}
280280

281+
struct bloom_keyvec *bloom_keyvec_new(const char *path, size_t len,
282+
const struct bloom_filter_settings *settings)
283+
{
284+
struct bloom_keyvec *vec;
285+
const char *p;
286+
size_t sz;
287+
size_t nr = 1;
288+
289+
p = path;
290+
while (*p) {
291+
/*
292+
* At this point, the path is normalized to use Unix-style
293+
* path separators. This is required due to how the
294+
* changed-path Bloom filters store the paths.
295+
*/
296+
if (*p == '/')
297+
nr++;
298+
p++;
299+
}
300+
301+
sz = sizeof(struct bloom_keyvec);
302+
sz += nr * sizeof(struct bloom_key);
303+
vec = (struct bloom_keyvec *)xcalloc(1, sz);
304+
if (!vec)
305+
return NULL;
306+
vec->count = nr;
307+
308+
bloom_key_fill(&vec->key[0], path, len, settings);
309+
nr = 1;
310+
p = path + len - 1;
311+
while (p > path) {
312+
if (*p == '/') {
313+
bloom_key_fill(&vec->key[nr++], path, p - path, settings);
314+
}
315+
p--;
316+
}
317+
assert(nr == vec->count);
318+
return vec;
319+
}
320+
321+
void bloom_keyvec_free(struct bloom_keyvec *vec)
322+
{
323+
if (!vec)
324+
return;
325+
for (size_t nr = 0; nr < vec->count; nr++)
326+
bloom_key_clear(&vec->key[nr]);
327+
free(vec);
328+
}
329+
281330
static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED,
282331
const struct hashmap_entry *eptr,
283332
const struct hashmap_entry *entry_or_key,
@@ -539,6 +588,18 @@ int bloom_filter_contains(const struct bloom_filter *filter,
539588
return 1;
540589
}
541590

591+
int bloom_filter_contains_vec(const struct bloom_filter *filter,
592+
const struct bloom_keyvec *vec,
593+
const struct bloom_filter_settings *settings)
594+
{
595+
int ret = 1;
596+
597+
for (size_t nr = 0; ret > 0 && nr < vec->count; nr++)
598+
ret = bloom_filter_contains(filter, &vec->key[nr], settings);
599+
600+
return ret;
601+
}
602+
542603
uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len,
543604
int version)
544605
{

bloom.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,16 @@ struct bloom_key {
7474
uint32_t *hashes;
7575
};
7676

77+
/*
78+
* A bloom_keyvec is a vector of bloom_keys, which
79+
* can be used to store multiple keys for a single
80+
* pathspec item.
81+
*/
82+
struct bloom_keyvec {
83+
size_t count;
84+
struct bloom_key key[FLEX_ARRAY];
85+
};
86+
7787
int load_bloom_filter_from_graph(struct commit_graph *g,
7888
struct bloom_filter *filter,
7989
uint32_t graph_pos);
@@ -82,6 +92,23 @@ void bloom_key_fill(struct bloom_key *key, const char *data, size_t len,
8292
const struct bloom_filter_settings *settings);
8393
void bloom_key_clear(struct bloom_key *key);
8494

95+
/*
96+
* bloom_keyvec_new - Allocate and populate a bloom_keyvec with keys for the
97+
* given path.
98+
*
99+
* This function splits the input path by '/' and generates a bloom key for each
100+
* prefix, in reverse order of specificity. For example, given the input
101+
* "a/b/c", it will generate bloom keys for:
102+
* - "a/b/c"
103+
* - "a/b"
104+
* - "a"
105+
*
106+
* The resulting keys are stored in a newly allocated bloom_keyvec.
107+
*/
108+
struct bloom_keyvec *bloom_keyvec_new(const char *path, size_t len,
109+
const struct bloom_filter_settings *settings);
110+
void bloom_keyvec_free(struct bloom_keyvec *vec);
111+
85112
void add_key_to_filter(const struct bloom_key *key,
86113
struct bloom_filter *filter,
87114
const struct bloom_filter_settings *settings);
@@ -126,6 +153,17 @@ int bloom_filter_contains(const struct bloom_filter *filter,
126153
const struct bloom_key *key,
127154
const struct bloom_filter_settings *settings);
128155

156+
/*
157+
* bloom_filter_contains_vec - Check if all keys in a key vector are in the
158+
* Bloom filter.
159+
*
160+
* Returns 1 if **all** keys in the vector are present in the filter,
161+
* 0 if **any** key is not present.
162+
*/
163+
int bloom_filter_contains_vec(const struct bloom_filter *filter,
164+
const struct bloom_keyvec *v,
165+
const struct bloom_filter_settings *settings);
166+
129167
uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len,
130168
int version);
131169

revision.c

Lines changed: 30 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -685,13 +685,14 @@ static int forbid_bloom_filters(struct pathspec *spec)
685685
return 0;
686686
}
687687

688+
static void release_revisions_bloom_keyvecs(struct rev_info *revs);
689+
688690
static void prepare_to_use_bloom_filter(struct rev_info *revs)
689691
{
690692
struct pathspec_item *pi;
691693
char *path_alloc = NULL;
692-
const char *path, *p;
694+
const char *path;
693695
size_t len;
694-
int path_component_nr = 1;
695696

696697
if (!revs->commits)
697698
return;
@@ -708,6 +709,8 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
708709
if (!revs->pruning.pathspec.nr)
709710
return;
710711

712+
revs->bloom_keyvecs_nr = 1;
713+
CALLOC_ARRAY(revs->bloom_keyvecs, 1);
711714
pi = &revs->pruning.pathspec.items[0];
712715

713716
/* remove single trailing slash from path, if needed */
@@ -718,53 +721,30 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
718721
path = pi->match;
719722

720723
len = strlen(path);
721-
if (!len) {
722-
revs->bloom_filter_settings = NULL;
723-
free(path_alloc);
724-
return;
725-
}
726-
727-
p = path;
728-
while (*p) {
729-
/*
730-
* At this point, the path is normalized to use Unix-style
731-
* path separators. This is required due to how the
732-
* changed-path Bloom filters store the paths.
733-
*/
734-
if (*p == '/')
735-
path_component_nr++;
736-
p++;
737-
}
738-
739-
revs->bloom_keys_nr = path_component_nr;
740-
ALLOC_ARRAY(revs->bloom_keys, revs->bloom_keys_nr);
724+
if (!len)
725+
goto fail;
741726

742-
bloom_key_fill(&revs->bloom_keys[0], path, len,
743-
revs->bloom_filter_settings);
744-
path_component_nr = 1;
745-
746-
p = path + len - 1;
747-
while (p > path) {
748-
if (*p == '/')
749-
bloom_key_fill(&revs->bloom_keys[path_component_nr++],
750-
path, p - path,
751-
revs->bloom_filter_settings);
752-
p--;
753-
}
727+
revs->bloom_keyvecs[0] =
728+
bloom_keyvec_new(path, len, revs->bloom_filter_settings);
754729

755730
if (trace2_is_enabled() && !bloom_filter_atexit_registered) {
756731
atexit(trace2_bloom_filter_statistics_atexit);
757732
bloom_filter_atexit_registered = 1;
758733
}
759734

735+
return;
736+
737+
fail:
738+
revs->bloom_filter_settings = NULL;
760739
free(path_alloc);
740+
release_revisions_bloom_keyvecs(revs);
761741
}
762742

763743
static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
764744
struct commit *commit)
765745
{
766746
struct bloom_filter *filter;
767-
int result = 1, j;
747+
int result = 0;
768748

769749
if (!revs->repo->objects->commit_graph)
770750
return -1;
@@ -779,10 +759,10 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
779759
return -1;
780760
}
781761

782-
for (j = 0; result && j < revs->bloom_keys_nr; j++) {
783-
result = bloom_filter_contains(filter,
784-
&revs->bloom_keys[j],
785-
revs->bloom_filter_settings);
762+
for (size_t nr = 0; !result && nr < revs->bloom_keyvecs_nr; nr++) {
763+
result = bloom_filter_contains_vec(filter,
764+
revs->bloom_keyvecs[nr],
765+
revs->bloom_filter_settings);
786766
}
787767

788768
if (result)
@@ -823,7 +803,7 @@ static int rev_compare_tree(struct rev_info *revs,
823803
return REV_TREE_SAME;
824804
}
825805

826-
if (revs->bloom_keys_nr && !nth_parent) {
806+
if (revs->bloom_keyvecs_nr && !nth_parent) {
827807
bloom_ret = check_maybe_different_in_bloom_filter(revs, commit);
828808

829809
if (bloom_ret == 0)
@@ -850,7 +830,7 @@ static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit,
850830
if (!t1)
851831
return 0;
852832

853-
if (!nth_parent && revs->bloom_keys_nr) {
833+
if (!nth_parent && revs->bloom_keyvecs_nr) {
854834
bloom_ret = check_maybe_different_in_bloom_filter(revs, commit);
855835
if (!bloom_ret)
856836
return 1;
@@ -3200,6 +3180,14 @@ static void release_revisions_mailmap(struct string_list *mailmap)
32003180

32013181
static void release_revisions_topo_walk_info(struct topo_walk_info *info);
32023182

3183+
static void release_revisions_bloom_keyvecs(struct rev_info *revs)
3184+
{
3185+
for (size_t nr = 0; nr < revs->bloom_keyvecs_nr; nr++)
3186+
bloom_keyvec_free(revs->bloom_keyvecs[nr]);
3187+
FREE_AND_NULL(revs->bloom_keyvecs);
3188+
revs->bloom_keyvecs_nr = 0;
3189+
}
3190+
32033191
static void free_void_commit_list(void *list)
32043192
{
32053193
free_commit_list(list);
@@ -3228,11 +3216,7 @@ void release_revisions(struct rev_info *revs)
32283216
clear_decoration(&revs->treesame, free);
32293217
line_log_free(revs);
32303218
oidset_clear(&revs->missing_commits);
3231-
3232-
for (int i = 0; i < revs->bloom_keys_nr; i++)
3233-
bloom_key_clear(&revs->bloom_keys[i]);
3234-
FREE_AND_NULL(revs->bloom_keys);
3235-
revs->bloom_keys_nr = 0;
3219+
release_revisions_bloom_keyvecs(revs);
32363220
}
32373221

32383222
static void add_child(struct rev_info *revs, struct commit *parent, struct commit *child)

revision.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ struct repository;
6262
struct rev_info;
6363
struct string_list;
6464
struct saved_parents;
65-
struct bloom_key;
65+
struct bloom_keyvec;
6666
struct bloom_filter_settings;
6767
struct option;
6868
struct parse_opt_ctx_t;
@@ -360,8 +360,8 @@ struct rev_info {
360360

361361
/* Commit graph bloom filter fields */
362362
/* The bloom filter key(s) for the pathspec */
363-
struct bloom_key *bloom_keys;
364-
int bloom_keys_nr;
363+
struct bloom_keyvec **bloom_keyvecs;
364+
int bloom_keyvecs_nr;
365365

366366
/*
367367
* The bloom filter settings used to generate the key.

0 commit comments

Comments
 (0)