66#include "commit-graph.h"
77#include "commit.h"
88#include "commit-slab.h"
9+ #include "tree.h"
10+ #include "tree-walk.h"
11+ #include "config.h"
912#include "repository.h"
1013
1114define_commit_slab (bloom_filter_slab , struct bloom_filter );
@@ -49,9 +52,9 @@ static int check_bloom_offset(struct commit_graph *g, uint32_t pos,
4952 return -1 ;
5053}
5154
52- static int load_bloom_filter_from_graph (struct commit_graph * g ,
53- struct bloom_filter * filter ,
54- uint32_t graph_pos )
55+ int load_bloom_filter_from_graph (struct commit_graph * g ,
56+ struct bloom_filter * filter ,
57+ uint32_t graph_pos )
5558{
5659 uint32_t lex_pos , start_index , end_index ;
5760
@@ -89,6 +92,8 @@ static int load_bloom_filter_from_graph(struct commit_graph *g,
8992 filter -> data = (unsigned char * )(g -> chunk_bloom_data +
9093 sizeof (unsigned char ) * start_index +
9194 BLOOMDATA_CHUNK_HEADER_SIZE );
95+ filter -> version = g -> bloom_filter_settings -> hash_version ;
96+ filter -> to_free = NULL ;
9297
9398 return 1 ;
9499}
@@ -100,7 +105,64 @@ static int load_bloom_filter_from_graph(struct commit_graph *g,
100105 * Not considered to be cryptographically secure.
101106 * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
102107 */
103- uint32_t murmur3_seeded (uint32_t seed , const char * data , size_t len )
108+ uint32_t murmur3_seeded_v2 (uint32_t seed , const char * data , size_t len )
109+ {
110+ const uint32_t c1 = 0xcc9e2d51 ;
111+ const uint32_t c2 = 0x1b873593 ;
112+ const uint32_t r1 = 15 ;
113+ const uint32_t r2 = 13 ;
114+ const uint32_t m = 5 ;
115+ const uint32_t n = 0xe6546b64 ;
116+ int i ;
117+ uint32_t k1 = 0 ;
118+ const char * tail ;
119+
120+ int len4 = len / sizeof (uint32_t );
121+
122+ uint32_t k ;
123+ for (i = 0 ; i < len4 ; i ++ ) {
124+ uint32_t byte1 = (uint32_t )(unsigned char )data [4 * i ];
125+ uint32_t byte2 = ((uint32_t )(unsigned char )data [4 * i + 1 ]) << 8 ;
126+ uint32_t byte3 = ((uint32_t )(unsigned char )data [4 * i + 2 ]) << 16 ;
127+ uint32_t byte4 = ((uint32_t )(unsigned char )data [4 * i + 3 ]) << 24 ;
128+ k = byte1 | byte2 | byte3 | byte4 ;
129+ k *= c1 ;
130+ k = rotate_left (k , r1 );
131+ k *= c2 ;
132+
133+ seed ^= k ;
134+ seed = rotate_left (seed , r2 ) * m + n ;
135+ }
136+
137+ tail = (data + len4 * sizeof (uint32_t ));
138+
139+ switch (len & (sizeof (uint32_t ) - 1 )) {
140+ case 3 :
141+ k1 ^= ((uint32_t )(unsigned char )tail [2 ]) << 16 ;
142+ /*-fallthrough*/
143+ case 2 :
144+ k1 ^= ((uint32_t )(unsigned char )tail [1 ]) << 8 ;
145+ /*-fallthrough*/
146+ case 1 :
147+ k1 ^= ((uint32_t )(unsigned char )tail [0 ]) << 0 ;
148+ k1 *= c1 ;
149+ k1 = rotate_left (k1 , r1 );
150+ k1 *= c2 ;
151+ seed ^= k1 ;
152+ break ;
153+ }
154+
155+ seed ^= (uint32_t )len ;
156+ seed ^= (seed >> 16 );
157+ seed *= 0x85ebca6b ;
158+ seed ^= (seed >> 13 );
159+ seed *= 0xc2b2ae35 ;
160+ seed ^= (seed >> 16 );
161+
162+ return seed ;
163+ }
164+
165+ static uint32_t murmur3_seeded_v1 (uint32_t seed , const char * data , size_t len )
104166{
105167 const uint32_t c1 = 0xcc9e2d51 ;
106168 const uint32_t c2 = 0x1b873593 ;
@@ -165,8 +227,14 @@ void fill_bloom_key(const char *data,
165227 int i ;
166228 const uint32_t seed0 = 0x293ae76f ;
167229 const uint32_t seed1 = 0x7e646e2c ;
168- const uint32_t hash0 = murmur3_seeded (seed0 , data , len );
169- const uint32_t hash1 = murmur3_seeded (seed1 , data , len );
230+ uint32_t hash0 , hash1 ;
231+ if (settings -> hash_version == 2 ) {
232+ hash0 = murmur3_seeded_v2 (seed0 , data , len );
233+ hash1 = murmur3_seeded_v2 (seed1 , data , len );
234+ } else {
235+ hash0 = murmur3_seeded_v1 (seed0 , data , len );
236+ hash1 = murmur3_seeded_v1 (seed1 , data , len );
237+ }
170238
171239 key -> hashes = (uint32_t * )xcalloc (settings -> num_hashes , sizeof (uint32_t ));
172240 for (i = 0 ; i < settings -> num_hashes ; i ++ )
@@ -198,6 +266,18 @@ void init_bloom_filters(void)
198266 init_bloom_filter_slab (& bloom_filters );
199267}
200268
269+ static void free_one_bloom_filter (struct bloom_filter * filter )
270+ {
271+ if (!filter )
272+ return ;
273+ free (filter -> to_free );
274+ }
275+
276+ void deinit_bloom_filters (void )
277+ {
278+ deep_clear_bloom_filter_slab (& bloom_filters , free_one_bloom_filter );
279+ }
280+
201281static int pathmap_cmp (const void * hashmap_cmp_fn_data UNUSED ,
202282 const struct hashmap_entry * eptr ,
203283 const struct hashmap_entry * entry_or_key ,
@@ -211,11 +291,97 @@ static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED,
211291 return strcmp (e1 -> path , e2 -> path );
212292}
213293
214- static void init_truncated_large_filter (struct bloom_filter * filter )
294+ static void init_truncated_large_filter (struct bloom_filter * filter ,
295+ int version )
215296{
216- filter -> data = xmalloc (1 );
297+ filter -> data = filter -> to_free = xmalloc (1 );
217298 filter -> data [0 ] = 0xFF ;
218299 filter -> len = 1 ;
300+ filter -> version = version ;
301+ }
302+
303+ #define VISITED (1u<<21)
304+ #define HIGH_BITS (1u<<22)
305+
306+ static int has_entries_with_high_bit (struct repository * r , struct tree * t )
307+ {
308+ if (parse_tree (t ))
309+ return 1 ;
310+
311+ if (!(t -> object .flags & VISITED )) {
312+ struct tree_desc desc ;
313+ struct name_entry entry ;
314+
315+ init_tree_desc (& desc , & t -> object .oid , t -> buffer , t -> size );
316+ while (tree_entry (& desc , & entry )) {
317+ size_t i ;
318+ for (i = 0 ; i < entry .pathlen ; i ++ ) {
319+ if (entry .path [i ] & 0x80 ) {
320+ t -> object .flags |= HIGH_BITS ;
321+ goto done ;
322+ }
323+ }
324+
325+ if (S_ISDIR (entry .mode )) {
326+ struct tree * sub = lookup_tree (r , & entry .oid );
327+ if (sub && has_entries_with_high_bit (r , sub )) {
328+ t -> object .flags |= HIGH_BITS ;
329+ goto done ;
330+ }
331+ }
332+
333+ }
334+
335+ done :
336+ t -> object .flags |= VISITED ;
337+ }
338+
339+ return !!(t -> object .flags & HIGH_BITS );
340+ }
341+
342+ static int commit_tree_has_high_bit_paths (struct repository * r ,
343+ struct commit * c )
344+ {
345+ struct tree * t ;
346+ if (repo_parse_commit (r , c ))
347+ return 1 ;
348+ t = repo_get_commit_tree (r , c );
349+ if (!t )
350+ return 1 ;
351+ return has_entries_with_high_bit (r , t );
352+ }
353+
354+ static struct bloom_filter * upgrade_filter (struct repository * r , struct commit * c ,
355+ struct bloom_filter * filter ,
356+ int hash_version )
357+ {
358+ struct commit_list * p = c -> parents ;
359+ if (commit_tree_has_high_bit_paths (r , c ))
360+ return NULL ;
361+
362+ if (p && commit_tree_has_high_bit_paths (r , p -> item ))
363+ return NULL ;
364+
365+ filter -> version = hash_version ;
366+
367+ return filter ;
368+ }
369+
370+ struct bloom_filter * get_bloom_filter (struct repository * r , struct commit * c )
371+ {
372+ struct bloom_filter * filter ;
373+ int hash_version ;
374+
375+ filter = get_or_compute_bloom_filter (r , c , 0 , NULL , NULL );
376+ if (!filter )
377+ return NULL ;
378+
379+ prepare_repo_settings (r );
380+ hash_version = r -> settings .commit_graph_changed_paths_version ;
381+
382+ if (!(hash_version == -1 || hash_version == filter -> version ))
383+ return NULL ; /* unusable filter */
384+ return filter ;
219385}
220386
221387struct bloom_filter * get_or_compute_bloom_filter (struct repository * r ,
@@ -243,8 +409,23 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
243409 filter , graph_pos );
244410 }
245411
246- if (filter -> data && filter -> len )
247- return filter ;
412+ if (filter -> data && filter -> len ) {
413+ struct bloom_filter * upgrade ;
414+ if (!settings || settings -> hash_version == filter -> version )
415+ return filter ;
416+
417+ /* version mismatch, see if we can upgrade */
418+ if (compute_if_not_present &&
419+ git_env_bool ("GIT_TEST_UPGRADE_BLOOM_FILTERS" , 1 )) {
420+ upgrade = upgrade_filter (r , c , filter ,
421+ settings -> hash_version );
422+ if (upgrade ) {
423+ if (computed )
424+ * computed |= BLOOM_UPGRADED ;
425+ return upgrade ;
426+ }
427+ }
428+ }
248429 if (!compute_if_not_present )
249430 return NULL ;
250431
@@ -300,19 +481,22 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
300481 }
301482
302483 if (hashmap_get_size (& pathmap ) > settings -> max_changed_paths ) {
303- init_truncated_large_filter (filter );
484+ init_truncated_large_filter (filter ,
485+ settings -> hash_version );
304486 if (computed )
305487 * computed |= BLOOM_TRUNC_LARGE ;
306488 goto cleanup ;
307489 }
308490
309491 filter -> len = (hashmap_get_size (& pathmap ) * settings -> bits_per_entry + BITS_PER_WORD - 1 ) / BITS_PER_WORD ;
492+ filter -> version = settings -> hash_version ;
310493 if (!filter -> len ) {
311494 if (computed )
312495 * computed |= BLOOM_TRUNC_EMPTY ;
313496 filter -> len = 1 ;
314497 }
315498 CALLOC_ARRAY (filter -> data , filter -> len );
499+ filter -> to_free = filter -> data ;
316500
317501 hashmap_for_each_entry (& pathmap , & iter , e , entry ) {
318502 struct bloom_key key ;
@@ -326,7 +510,7 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
326510 } else {
327511 for (i = 0 ; i < diff_queued_diff .nr ; i ++ )
328512 diff_free_filepair (diff_queued_diff .queue [i ]);
329- init_truncated_large_filter (filter );
513+ init_truncated_large_filter (filter , settings -> hash_version );
330514
331515 if (computed )
332516 * computed |= BLOOM_TRUNC_LARGE ;
0 commit comments