Skip to content

Commit 899b5ed

Browse files
To1negitster
authored andcommitted
last-modified: use Bloom filters when available
Our 'git last-modified' performs a revision walk, and computes a diff at each point in the walk to figure out whether a given revision changed any of the paths it considers interesting. When changed-path Bloom filters are available, we can avoid computing many such diffs. Before computing a diff, we first check if any of the remaining paths of interest were possibly changed at a given commit by consulting its Bloom filter. If any of them are, we are resigned to compute the diff. If none of those queries returned "maybe", we know that the given commit doesn't contain any changed paths which are interesting to us. So, we can avoid computing it in this case. Comparing the perf test results on git.git: Test HEAD~ HEAD ------------------------------------------------------------------------------------ 8020.1: top-level last-modified 4.49(4.34+0.11) 2.22(2.05+0.09) -50.6% 8020.2: top-level recursive last-modified 5.64(5.45+0.11) 5.62(5.30+0.11) -0.4% 8020.3: subdir last-modified 0.11(0.06+0.04) 0.07(0.03+0.04) -36.4% Based-on-patch-by: Taylor Blau <[email protected]> Signed-off-by: Toon Claes <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 3cb6875 commit 899b5ed

File tree

2 files changed

+52
-3
lines changed

2 files changed

+52
-3
lines changed

builtin/last-modified.c

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include "git-compat-util.h"
2+
#include "bloom.h"
23
#include "builtin.h"
4+
#include "commit-graph.h"
35
#include "commit.h"
46
#include "config.h"
57
#include "diff.h"
@@ -18,6 +20,7 @@
1820
struct last_modified_entry {
1921
struct hashmap_entry hashent;
2022
struct object_id oid;
23+
struct bloom_key key;
2124
const char path[FLEX_ARRAY];
2225
};
2326

@@ -42,6 +45,12 @@ struct last_modified {
4245

4346
static void last_modified_release(struct last_modified *lm)
4447
{
48+
struct hashmap_iter iter;
49+
struct last_modified_entry *ent;
50+
51+
hashmap_for_each_entry(&lm->paths, &iter, ent, hashent)
52+
bloom_key_clear(&ent->key);
53+
4554
hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent);
4655
release_revisions(&lm->rev);
4756
}
@@ -63,6 +72,9 @@ static void add_path_from_diff(struct diff_queue_struct *q,
6372

6473
FLEX_ALLOC_STR(ent, path, path);
6574
oidcpy(&ent->oid, &p->two->oid);
75+
if (lm->rev.bloom_filter_settings)
76+
bloom_key_fill(&ent->key, path, strlen(path),
77+
lm->rev.bloom_filter_settings);
6678
hashmap_entry_init(&ent->hashent, strhash(ent->path));
6779
hashmap_add(&lm->paths, &ent->hashent);
6880
}
@@ -139,6 +151,7 @@ static void mark_path(const char *path, const struct object_id *oid,
139151
last_modified_emit(data->lm, path, data->commit);
140152

141153
hashmap_remove(&data->lm->paths, &ent->hashent, path);
154+
bloom_key_clear(&ent->key);
142155
free(ent);
143156
}
144157

@@ -182,6 +195,30 @@ static void last_modified_diff(struct diff_queue_struct *q,
182195
}
183196
}
184197

198+
static bool maybe_changed_path(struct last_modified *lm, struct commit *origin)
199+
{
200+
struct bloom_filter *filter;
201+
struct last_modified_entry *ent;
202+
struct hashmap_iter iter;
203+
204+
if (!lm->rev.bloom_filter_settings)
205+
return true;
206+
207+
if (commit_graph_generation(origin) == GENERATION_NUMBER_INFINITY)
208+
return true;
209+
210+
filter = get_bloom_filter(lm->rev.repo, origin);
211+
if (!filter)
212+
return true;
213+
214+
hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) {
215+
if (bloom_filter_contains(filter, &ent->key,
216+
lm->rev.bloom_filter_settings))
217+
return true;
218+
}
219+
return false;
220+
}
221+
185222
static int last_modified_run(struct last_modified *lm)
186223
{
187224
struct last_modified_callback_data data = { .lm = lm };
@@ -202,9 +239,14 @@ static int last_modified_run(struct last_modified *lm)
202239
&data.commit->object.oid, "",
203240
&lm->rev.diffopt);
204241
diff_flush(&lm->rev.diffopt);
205-
} else {
206-
log_tree_commit(&lm->rev, data.commit);
242+
243+
break;
207244
}
245+
246+
if (!maybe_changed_path(lm, data.commit))
247+
continue;
248+
249+
log_tree_commit(&lm->rev, data.commit);
208250
}
209251

210252
return 0;
@@ -231,6 +273,8 @@ static int last_modified_init(struct last_modified *lm, struct repository *r,
231273
return argc;
232274
}
233275

276+
lm->rev.bloom_filter_settings = get_bloom_filter_settings(lm->rev.repo);
277+
234278
if (populate_paths_from_revs(lm) < 0)
235279
return error(_("unable to setup last-modified"));
236280

commit-graph.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -820,7 +820,12 @@ int corrected_commit_dates_enabled(struct repository *r)
820820

821821
struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r)
822822
{
823-
struct commit_graph *g = r->objects->commit_graph;
823+
struct commit_graph *g;
824+
825+
if (!prepare_commit_graph(r))
826+
return NULL;
827+
828+
g = r->objects->commit_graph;
824829
while (g) {
825830
if (g->bloom_filter_settings)
826831
return g->bloom_filter_settings;

0 commit comments

Comments
 (0)