Skip to content

Commit 7562a72

Browse files
To1negitster
authored andcommitted
last-modified: use Bloom filters when available
Our 'git last-modified' performs a revision walk, and computes a diff at each point in the walk to figure out whether a given revision changed any of the paths it considers interesting. When changed-path Bloom filters are available, we can avoid computing many such diffs. Before computing a diff, we first check if any of the remaining paths of interest were possibly changed at a given commit by consulting its Bloom filter. If any of them are, we are resigned to compute the diff. If none of those queries returned "maybe", we know that the given commit doesn't contain any changed paths which are interesting to us. So, we can avoid computing it in this case. Comparing the perf test results on git.git: Test HEAD~ HEAD ------------------------------------------------------------------------------------ 8020.1: top-level last-modified 4.49(4.34+0.11) 2.22(2.05+0.09) -50.6% 8020.2: top-level recursive last-modified 5.64(5.45+0.11) 5.62(5.30+0.11) -0.4% 8020.3: subdir last-modified 0.11(0.06+0.04) 0.07(0.03+0.04) -36.4% Based-on-patch-by: Taylor Blau <[email protected]> Signed-off-by: Toon Claes <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent da0f0ed commit 7562a72

File tree

1 file changed

+40
-0
lines changed

1 file changed

+40
-0
lines changed

builtin/last-modified.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include "git-compat-util.h"
2+
#include "bloom.h"
23
#include "builtin.h"
4+
#include "commit-graph.h"
35
#include "commit.h"
46
#include "config.h"
57
#include "diff.h"
@@ -17,6 +19,7 @@
1719
struct last_modified_entry {
1820
struct hashmap_entry hashent;
1921
struct object_id oid;
22+
struct bloom_key key;
2023
const char path[FLEX_ARRAY];
2124
};
2225

@@ -41,6 +44,12 @@ struct last_modified {
4144

4245
static void last_modified_release(struct last_modified *lm)
4346
{
47+
struct hashmap_iter iter;
48+
struct last_modified_entry *ent;
49+
50+
hashmap_for_each_entry(&lm->paths, &iter, ent, hashent)
51+
bloom_key_clear(&ent->key);
52+
4453
hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent);
4554
release_revisions(&lm->rev);
4655
}
@@ -62,6 +71,9 @@ static void add_path_from_diff(struct diff_queue_struct *q,
6271

6372
FLEX_ALLOC_STR(ent, path, path);
6473
oidcpy(&ent->oid, &p->two->oid);
74+
if (lm->rev.bloom_filter_settings)
75+
bloom_key_fill(&ent->key, path, strlen(path),
76+
lm->rev.bloom_filter_settings);
6577
hashmap_entry_init(&ent->hashent, strhash(ent->path));
6678
hashmap_add(&lm->paths, &ent->hashent);
6779
}
@@ -136,6 +148,7 @@ static void mark_path(const char *path, const struct object_id *oid,
136148
last_modified_emit(data->lm, path, data->commit);
137149

138150
hashmap_remove(&data->lm->paths, &ent->hashent, path);
151+
bloom_key_clear(&ent->key);
139152
free(ent);
140153
}
141154

@@ -179,6 +192,27 @@ static void last_modified_diff(struct diff_queue_struct *q,
179192
}
180193
}
181194

195+
static int maybe_changed_path(struct last_modified *lm, struct commit *origin)
196+
{
197+
struct bloom_filter *filter;
198+
struct last_modified_entry *ent;
199+
struct hashmap_iter iter;
200+
201+
if (!lm->rev.bloom_filter_settings)
202+
return 1;
203+
204+
filter = get_bloom_filter(lm->rev.repo, origin);
205+
if (!filter)
206+
return 1;
207+
208+
hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) {
209+
if (bloom_filter_contains(filter, &ent->key,
210+
lm->rev.bloom_filter_settings))
211+
return 1;
212+
}
213+
return 0;
214+
}
215+
182216
static int last_modified_run(struct last_modified *lm)
183217
{
184218
struct last_modified_callback_data data = { .lm = lm };
@@ -194,6 +228,9 @@ static int last_modified_run(struct last_modified *lm)
194228
if (!data.commit)
195229
break;
196230

231+
if (!maybe_changed_path(lm, data.commit))
232+
continue;
233+
197234
if (data.commit->object.flags & BOUNDARY) {
198235
diff_tree_oid(lm->rev.repo->hash_algo->empty_tree,
199236
&data.commit->object.oid, "",
@@ -227,6 +264,9 @@ static int last_modified_init(struct last_modified *lm, struct repository *r,
227264
return argc;
228265
}
229266

267+
prepare_commit_graph(lm->rev.repo);
268+
lm->rev.bloom_filter_settings = get_bloom_filter_settings(lm->rev.repo);
269+
230270
if (populate_paths_from_revs(lm) < 0)
231271
return error(_("unable to setup last-modified"));
232272

0 commit comments

Comments
 (0)