Skip to content

Commit f22d4ac

Browse files
committed
Merge branch 'ly/changed-paths-traversal'
Lift the limitation to use changed-path filter in "git log" so that it can be used for a pathspec with multiple literal paths. * ly/changed-paths-traversal: bloom: optimize multiple pathspec items in revision revision: make helper for pathspec to bloom keyvec bloom: replace struct bloom_key * with struct bloom_keyvec bloom: rename function operates on bloom_key bloom: add test helper to return murmur3 hash
2 parents 0e8243a + 2a6ce09 commit f22d4ac

File tree

8 files changed

+204
-100
lines changed

8 files changed

+204
-100
lines changed

blame.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1311,7 +1311,7 @@ static void add_bloom_key(struct blame_bloom_data *bd,
13111311
}
13121312

13131313
bd->keys[bd->nr] = xmalloc(sizeof(struct bloom_key));
1314-
fill_bloom_key(path, strlen(path), bd->keys[bd->nr], bd->settings);
1314+
bloom_key_fill(bd->keys[bd->nr], path, strlen(path), bd->settings);
13151315
bd->nr++;
13161316
}
13171317

bloom.c

Lines changed: 77 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ int load_bloom_filter_from_graph(struct commit_graph *g,
107107
* Not considered to be cryptographically secure.
108108
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
109109
*/
110-
uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len)
110+
static uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len)
111111
{
112112
const uint32_t c1 = 0xcc9e2d51;
113113
const uint32_t c2 = 0x1b873593;
@@ -221,9 +221,7 @@ static uint32_t murmur3_seeded_v1(uint32_t seed, const char *data, size_t len)
221221
return seed;
222222
}
223223

224-
void fill_bloom_key(const char *data,
225-
size_t len,
226-
struct bloom_key *key,
224+
void bloom_key_fill(struct bloom_key *key, const char *data, size_t len,
227225
const struct bloom_filter_settings *settings)
228226
{
229227
int i;
@@ -243,7 +241,7 @@ void fill_bloom_key(const char *data,
243241
key->hashes[i] = hash0 + i * hash1;
244242
}
245243

246-
void clear_bloom_key(struct bloom_key *key)
244+
void bloom_key_clear(struct bloom_key *key)
247245
{
248246
FREE_AND_NULL(key->hashes);
249247
}
@@ -280,6 +278,55 @@ void deinit_bloom_filters(void)
280278
deep_clear_bloom_filter_slab(&bloom_filters, free_one_bloom_filter);
281279
}
282280

281+
struct bloom_keyvec *bloom_keyvec_new(const char *path, size_t len,
282+
const struct bloom_filter_settings *settings)
283+
{
284+
struct bloom_keyvec *vec;
285+
const char *p;
286+
size_t sz;
287+
size_t nr = 1;
288+
289+
p = path;
290+
while (*p) {
291+
/*
292+
* At this point, the path is normalized to use Unix-style
293+
* path separators. This is required due to how the
294+
* changed-path Bloom filters store the paths.
295+
*/
296+
if (*p == '/')
297+
nr++;
298+
p++;
299+
}
300+
301+
sz = sizeof(struct bloom_keyvec);
302+
sz += nr * sizeof(struct bloom_key);
303+
vec = (struct bloom_keyvec *)xcalloc(1, sz);
304+
if (!vec)
305+
return NULL;
306+
vec->count = nr;
307+
308+
bloom_key_fill(&vec->key[0], path, len, settings);
309+
nr = 1;
310+
p = path + len - 1;
311+
while (p > path) {
312+
if (*p == '/') {
313+
bloom_key_fill(&vec->key[nr++], path, p - path, settings);
314+
}
315+
p--;
316+
}
317+
assert(nr == vec->count);
318+
return vec;
319+
}
320+
321+
void bloom_keyvec_free(struct bloom_keyvec *vec)
322+
{
323+
if (!vec)
324+
return;
325+
for (size_t nr = 0; nr < vec->count; nr++)
326+
bloom_key_clear(&vec->key[nr]);
327+
free(vec);
328+
}
329+
283330
static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED,
284331
const struct hashmap_entry *eptr,
285332
const struct hashmap_entry *entry_or_key,
@@ -500,9 +547,9 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
500547

501548
hashmap_for_each_entry(&pathmap, &iter, e, entry) {
502549
struct bloom_key key;
503-
fill_bloom_key(e->path, strlen(e->path), &key, settings);
550+
bloom_key_fill(&key, e->path, strlen(e->path), settings);
504551
add_key_to_filter(&key, filter, settings);
505-
clear_bloom_key(&key);
552+
bloom_key_clear(&key);
506553
}
507554

508555
cleanup:
@@ -540,3 +587,26 @@ int bloom_filter_contains(const struct bloom_filter *filter,
540587

541588
return 1;
542589
}
590+
591+
int bloom_filter_contains_vec(const struct bloom_filter *filter,
592+
const struct bloom_keyvec *vec,
593+
const struct bloom_filter_settings *settings)
594+
{
595+
int ret = 1;
596+
597+
for (size_t nr = 0; ret > 0 && nr < vec->count; nr++)
598+
ret = bloom_filter_contains(filter, &vec->key[nr], settings);
599+
600+
return ret;
601+
}
602+
603+
uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len,
604+
int version)
605+
{
606+
assert(version == 1 || version == 2);
607+
608+
if (version == 2)
609+
return murmur3_seeded_v2(seed, data, len);
610+
else
611+
return murmur3_seeded_v1(seed, data, len);
612+
}

bloom.h

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -74,24 +74,40 @@ struct bloom_key {
7474
uint32_t *hashes;
7575
};
7676

77+
/*
78+
* A bloom_keyvec is a vector of bloom_keys, which
79+
* can be used to store multiple keys for a single
80+
* pathspec item.
81+
*/
82+
struct bloom_keyvec {
83+
size_t count;
84+
struct bloom_key key[FLEX_ARRAY];
85+
};
86+
7787
int load_bloom_filter_from_graph(struct commit_graph *g,
7888
struct bloom_filter *filter,
7989
uint32_t graph_pos);
8090

91+
void bloom_key_fill(struct bloom_key *key, const char *data, size_t len,
92+
const struct bloom_filter_settings *settings);
93+
void bloom_key_clear(struct bloom_key *key);
94+
8195
/*
82-
* Calculate the murmur3 32-bit hash value for the given data
83-
* using the given seed.
84-
* Produces a uniformly distributed hash value.
85-
* Not considered to be cryptographically secure.
86-
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
96+
* bloom_keyvec_new - Allocate and populate a bloom_keyvec with keys for the
97+
* given path.
98+
*
99+
* This function splits the input path by '/' and generates a bloom key for each
100+
* prefix, in reverse order of specificity. For example, given the input
101+
* "a/b/c", it will generate bloom keys for:
102+
* - "a/b/c"
103+
* - "a/b"
104+
* - "a"
105+
*
106+
* The resulting keys are stored in a newly allocated bloom_keyvec.
87107
*/
88-
uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len);
89-
90-
void fill_bloom_key(const char *data,
91-
size_t len,
92-
struct bloom_key *key,
93-
const struct bloom_filter_settings *settings);
94-
void clear_bloom_key(struct bloom_key *key);
108+
struct bloom_keyvec *bloom_keyvec_new(const char *path, size_t len,
109+
const struct bloom_filter_settings *settings);
110+
void bloom_keyvec_free(struct bloom_keyvec *vec);
95111

96112
void add_key_to_filter(const struct bloom_key *key,
97113
struct bloom_filter *filter,
@@ -137,4 +153,18 @@ int bloom_filter_contains(const struct bloom_filter *filter,
137153
const struct bloom_key *key,
138154
const struct bloom_filter_settings *settings);
139155

156+
/*
157+
* bloom_filter_contains_vec - Check if all keys in a key vector are in the
158+
* Bloom filter.
159+
*
160+
* Returns 1 if **all** keys in the vector are present in the filter,
161+
* 0 if **any** key is not present.
162+
*/
163+
int bloom_filter_contains_vec(const struct bloom_filter *filter,
164+
const struct bloom_keyvec *v,
165+
const struct bloom_filter_settings *settings);
166+
167+
uint32_t test_bloom_murmur3_seeded(uint32_t seed, const char *data, size_t len,
168+
int version);
169+
140170
#endif

line-log.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,12 +1172,13 @@ static int bloom_filter_check(struct rev_info *rev,
11721172
return 0;
11731173

11741174
while (!result && range) {
1175-
fill_bloom_key(range->path, strlen(range->path), &key, rev->bloom_filter_settings);
1175+
bloom_key_fill(&key, range->path, strlen(range->path),
1176+
rev->bloom_filter_settings);
11761177

11771178
if (bloom_filter_contains(filter, &key, rev->bloom_filter_settings))
11781179
result = 1;
11791180

1180-
clear_bloom_key(&key);
1181+
bloom_key_clear(&key);
11811182
range = range->next;
11821183
}
11831184

0 commit comments

Comments
 (0)