From a6de351e47bd78e7d646a1289a9019553540a151 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Mon, 30 Jun 2025 20:49:23 +0200 Subject: [PATCH 1/3] last-modified: new subcommand to show when files were last modified MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to git-blame(1), introduce a new subcommand git-last-modified(1). This command shows the most recent modification to paths in a tree. It does so by expanding the tree at a given commit, taking note of the current state of each path, and then walking backwards through history looking for commits where each path changed into its final commit ID. Based-on-patch-by: Jeff King Improved-by: Ævar Arnfjörð Bjarmason Signed-off-by: Toon Claes --- .gitignore | 1 + Documentation/git-last-modified.adoc | 50 +++++ Documentation/meson.build | 1 + Makefile | 1 + builtin.h | 1 + builtin/last-modified.c | 280 +++++++++++++++++++++++++++ command-list.txt | 1 + git.c | 1 + meson.build | 1 + t/meson.build | 1 + t/t8020-last-modified.sh | 203 +++++++++++++++++++ 11 files changed, 541 insertions(+) create mode 100644 Documentation/git-last-modified.adoc create mode 100644 builtin/last-modified.c create mode 100755 t/t8020-last-modified.sh diff --git a/.gitignore b/.gitignore index 04c444404e4ba8..a36ee944433574 100644 --- a/.gitignore +++ b/.gitignore @@ -87,6 +87,7 @@ /git-init-db /git-interpret-trailers /git-instaweb +/git-last-modified /git-log /git-ls-files /git-ls-remote diff --git a/Documentation/git-last-modified.adoc b/Documentation/git-last-modified.adoc new file mode 100644 index 00000000000000..ba08df0186212b --- /dev/null +++ b/Documentation/git-last-modified.adoc @@ -0,0 +1,50 @@ +git-last-modified(1) +==================== + +NAME +---- +git-last-modified - EXPERIMENTAL: Show when files were last modified + + +SYNOPSIS +-------- +[synopsis] +git last-modified [--recursive] [--recursive-with-trees] [] [[--] ...] + +DESCRIPTION +----------- + +Shows which commit last modified each of the relevant files and subdirectories. +A commit renaming a path, or changing it's mode is also taken into account. + +THIS COMMAND IS EXPERIMENTAL. THE BEHAVIOR MAY CHANGE. + +OPTIONS +------- + +-r, --recursive:: + Recurse into subtrees. + +-t, --tree-in-recursive:: + Show tree entry itself as well as subtrees. Implies `-r`. + +:: + Only traverse commits in the specified revision range. When no + `` is specified, it defaults to `HEAD` (i.e. the whole + history leading to the current commit). For a complete list of ways to + spell ``, see the 'Specifying Ranges' section of + linkgit:gitrevisions[7]. + +[--] ...:: + For each __ given, the commit which last modified it is returned. + Without an optional path parameter, all files and subdirectories + in path traversal the are included in the output. + +SEE ALSO +-------- +linkgit:git-blame[1], +linkgit:git-log[1]. + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/meson.build b/Documentation/meson.build index 4404c623f006db..a8ac5285f0abed 100644 --- a/Documentation/meson.build +++ b/Documentation/meson.build @@ -74,6 +74,7 @@ manpages = { 'git-init.adoc' : 1, 'git-instaweb.adoc' : 1, 'git-interpret-trailers.adoc' : 1, + 'git-last-modified.adoc' : 1, 'git-log.adoc' : 1, 'git-ls-files.adoc' : 1, 'git-ls-remote.adoc' : 1, diff --git a/Makefile b/Makefile index 5f7dd79dfa6ecf..b5ce55a70320dd 100644 --- a/Makefile +++ b/Makefile @@ -1265,6 +1265,7 @@ BUILTIN_OBJS += builtin/hook.o BUILTIN_OBJS += builtin/index-pack.o BUILTIN_OBJS += builtin/init-db.o BUILTIN_OBJS += builtin/interpret-trailers.o +BUILTIN_OBJS += builtin/last-modified.o BUILTIN_OBJS += builtin/log.o BUILTIN_OBJS += builtin/ls-files.o BUILTIN_OBJS += builtin/ls-remote.o diff --git a/builtin.h b/builtin.h index bff13e3069b4af..6ed6759ec4e037 100644 --- a/builtin.h +++ b/builtin.h @@ -176,6 +176,7 @@ int cmd_hook(int argc, const char **argv, const char *prefix, struct repository int cmd_index_pack(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_init_db(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_interpret_trailers(int argc, const char **argv, const char *prefix, struct repository *repo); +int cmd_last_modified(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_log_reflog(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_log(int argc, const char **argv, const char *prefix, struct repository *repo); int cmd_ls_files(int argc, const char **argv, const char *prefix, struct repository *repo); diff --git a/builtin/last-modified.c b/builtin/last-modified.c new file mode 100644 index 00000000000000..5701dc3b46754c --- /dev/null +++ b/builtin/last-modified.c @@ -0,0 +1,280 @@ +#include "git-compat-util.h" +#include "builtin.h" +#include "commit.h" +#include "config.h" +#include "diff.h" +#include "diffcore.h" +#include "hashmap.h" +#include "hex.h" +#include "log-tree.h" +#include "object-name.h" +#include "object.h" +#include "parse-options.h" +#include "quote.h" +#include "repository.h" +#include "revision.h" + +struct last_modified_entry { + struct hashmap_entry hashent; + struct object_id oid; + const char path[FLEX_ARRAY]; +}; + +static int last_modified_entry_hashcmp(const void *unused UNUSED, + const struct hashmap_entry *hent1, + const struct hashmap_entry *hent2, + const void *path) +{ + const struct last_modified_entry *ent1 = + container_of(hent1, const struct last_modified_entry, hashent); + const struct last_modified_entry *ent2 = + container_of(hent2, const struct last_modified_entry, hashent); + return strcmp(ent1->path, path ? path : ent2->path); +} + +struct last_modified { + struct hashmap paths; + struct rev_info rev; + bool recursive; + bool tree_in_recursive; +}; + +static void last_modified_release(struct last_modified *lm) +{ + hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent); + release_revisions(&lm->rev); +} + +struct last_modified_callback_data { + struct last_modified *lm; + struct commit *commit; +}; + +static void add_path_from_diff(struct diff_queue_struct *q, + struct diff_options *opt UNUSED, void *data) +{ + struct last_modified *lm = data; + + for (int i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + struct last_modified_entry *ent; + const char *path = p->two->path; + + FLEX_ALLOC_STR(ent, path, path); + oidcpy(&ent->oid, &p->two->oid); + hashmap_entry_init(&ent->hashent, strhash(ent->path)); + hashmap_add(&lm->paths, &ent->hashent); + } +} + +static int populate_paths_from_revs(struct last_modified *lm) +{ + int num_interesting = 0; + struct diff_options diffopt; + + /* + * Create a copy of `struct diff_options`. In this copy a callback is + * set that when called adds entries to `paths` in `struct last_modified`. + * This copy is used to diff the tree of the target revision against an + * empty tree. This results in all paths in the target revision being + * listed. After `paths` is populated, we don't need this copy no more. + */ + memcpy(&diffopt, &lm->rev.diffopt, sizeof(diffopt)); + copy_pathspec(&diffopt.pathspec, &lm->rev.diffopt.pathspec); + diffopt.output_format = DIFF_FORMAT_CALLBACK; + diffopt.format_callback = add_path_from_diff; + diffopt.format_callback_data = lm; + + for (size_t i = 0; i < lm->rev.pending.nr; i++) { + struct object_array_entry *obj = lm->rev.pending.objects + i; + + if (obj->item->flags & UNINTERESTING) + continue; + + if (num_interesting++) + return error(_("last-modified can only operate on one tree at a time")); + + diff_tree_oid(lm->rev.repo->hash_algo->empty_tree, + &obj->item->oid, "", &diffopt); + diff_flush(&diffopt); + } + clear_pathspec(&diffopt.pathspec); + + return 0; +} + +static void last_modified_emit(struct last_modified *lm, + const char *path, const struct commit *commit) + +{ + if (commit->object.flags & BOUNDARY) + putchar('^'); + printf("%s\t", oid_to_hex(&commit->object.oid)); + + if (lm->rev.diffopt.line_termination) + write_name_quoted(path, stdout, '\n'); + else + printf("%s%c", path, '\0'); +} + +static void mark_path(const char *path, const struct object_id *oid, + struct last_modified_callback_data *data) +{ + struct last_modified_entry *ent; + + /* Is it even a path that we are interested in? */ + ent = hashmap_get_entry_from_hash(&data->lm->paths, strhash(path), path, + struct last_modified_entry, hashent); + if (!ent) + return; + + /* + * Is it arriving at a version of interest, or is it from a side branch + * which did not contribute to the final state? + */ + if (!oideq(oid, &ent->oid)) + return; + + last_modified_emit(data->lm, path, data->commit); + + hashmap_remove(&data->lm->paths, &ent->hashent, path); + free(ent); +} + +static void last_modified_diff(struct diff_queue_struct *q, + struct diff_options *opt UNUSED, void *cbdata) +{ + struct last_modified_callback_data *data = cbdata; + + for (int i = 0; i < q->nr; i++) { + struct diff_filepair *p = q->queue[i]; + switch (p->status) { + case DIFF_STATUS_DELETED: + /* + * There's no point in feeding a deletion, as it could + * not have resulted in our current state, which + * actually has the file. + */ + break; + + default: + /* + * Otherwise, we care only that we somehow arrived at + * a final oid state. Note that this covers some + * potentially controversial areas, including: + * + * 1. A rename or copy will be found, as it is the + * first time the content has arrived at the given + * path. + * + * 2. Even a non-content modification like a mode or + * type change will trigger it. + * + * We take the inclusive approach for now, and find + * anything which impacts the path. Options to tweak + * the behavior (e.g., to "--follow" the content across + * renames) can come later. + */ + mark_path(p->two->path, &p->two->oid, data); + break; + } + } +} + +static int last_modified_run(struct last_modified *lm) +{ + struct last_modified_callback_data data = { .lm = lm }; + + lm->rev.diffopt.output_format = DIFF_FORMAT_CALLBACK; + lm->rev.diffopt.format_callback = last_modified_diff; + lm->rev.diffopt.format_callback_data = &data; + + prepare_revision_walk(&lm->rev); + + while (hashmap_get_size(&lm->paths)) { + data.commit = get_revision(&lm->rev); + if (!data.commit) + BUG("paths remaining beyond boundary in last-modified"); + + if (data.commit->object.flags & BOUNDARY) { + diff_tree_oid(lm->rev.repo->hash_algo->empty_tree, + &data.commit->object.oid, "", + &lm->rev.diffopt); + diff_flush(&lm->rev.diffopt); + } else { + log_tree_commit(&lm->rev, data.commit); + } + } + + return 0; +} + +static int last_modified_init(struct last_modified *lm, struct repository *r, + const char *prefix, int argc, const char **argv) +{ + hashmap_init(&lm->paths, last_modified_entry_hashcmp, NULL, 0); + + repo_init_revisions(r, &lm->rev, prefix); + lm->rev.def = "HEAD"; + lm->rev.combine_merges = 1; + lm->rev.show_root_diff = 1; + lm->rev.boundary = 1; + lm->rev.no_commit_id = 1; + lm->rev.diff = 1; + lm->rev.diffopt.flags.recursive = lm->recursive || lm->tree_in_recursive; + lm->rev.diffopt.flags.tree_in_recursive = lm->tree_in_recursive; + + argc = setup_revisions(argc, argv, &lm->rev, NULL); + if (argc > 1) { + error(_("unknown last-modified argument: %s"), argv[1]); + return argc; + } + + if (populate_paths_from_revs(lm) < 0) + return error(_("unable to setup last-modified")); + + return 0; +} + +int cmd_last_modified(int argc, const char **argv, const char *prefix, + struct repository *repo) +{ + int ret; + struct last_modified lm = { 0 }; + + const char * const last_modified_usage[] = { + N_("git last-modified [--recursive] [--recursive-with-trees] " + "[] [[--] ...]"), + NULL + }; + + struct option last_modified_options[] = { + OPT_BOOL('r', "recursive", &lm.recursive, + N_("recurse into subtrees")), + OPT_BOOL('t', "recursive-with-trees", &lm.tree_in_recursive, + N_("recurse into subtrees and include the tree entries too")), + OPT_END() + }; + + argc = parse_options(argc, argv, prefix, last_modified_options, + last_modified_usage, + PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN_OPT); + + repo_config(repo, git_default_config, NULL); + + ret = last_modified_init(&lm, repo, prefix, argc, argv); + if (ret > 0) + usage_with_options(last_modified_usage, + last_modified_options); + if (ret) + goto out; + + ret = last_modified_run(&lm); + if (ret) + goto out; + +out: + last_modified_release(&lm); + + return ret; +} diff --git a/command-list.txt b/command-list.txt index b7ade3ab9f3319..b715777b248a0d 100644 --- a/command-list.txt +++ b/command-list.txt @@ -124,6 +124,7 @@ git-index-pack plumbingmanipulators git-init mainporcelain init git-instaweb ancillaryinterrogators complete git-interpret-trailers purehelpers +git-last-modified plumbinginterrogators git-log mainporcelain info git-ls-files plumbinginterrogators git-ls-remote plumbinginterrogators diff --git a/git.c b/git.c index 07a5fe39fb69f0..76a0b2a1a44d39 100644 --- a/git.c +++ b/git.c @@ -565,6 +565,7 @@ static struct cmd_struct commands[] = { { "init", cmd_init_db }, { "init-db", cmd_init_db }, { "interpret-trailers", cmd_interpret_trailers, RUN_SETUP_GENTLY }, + { "last-modified", cmd_last_modified, RUN_SETUP }, { "log", cmd_log, RUN_SETUP }, { "ls-files", cmd_ls_files, RUN_SETUP }, { "ls-remote", cmd_ls_remote, RUN_SETUP_GENTLY }, diff --git a/meson.build b/meson.build index 9bc1826cb69e9b..77a3416b1c2384 100644 --- a/meson.build +++ b/meson.build @@ -607,6 +607,7 @@ builtin_sources = [ 'builtin/index-pack.c', 'builtin/init-db.c', 'builtin/interpret-trailers.c', + 'builtin/last-modified.c', 'builtin/log.c', 'builtin/ls-files.c', 'builtin/ls-remote.c', diff --git a/t/meson.build b/t/meson.build index 660d780dcc62d6..904455e3ab7fe1 100644 --- a/t/meson.build +++ b/t/meson.build @@ -961,6 +961,7 @@ integration_tests = [ 't8012-blame-colors.sh', 't8013-blame-ignore-revs.sh', 't8014-blame-ignore-fuzzy.sh', + 't8020-last-modified.sh', 't9001-send-email.sh', 't9002-column.sh', 't9003-help-autocorrect.sh', diff --git a/t/t8020-last-modified.sh b/t/t8020-last-modified.sh new file mode 100755 index 00000000000000..05c113a1f8dc4b --- /dev/null +++ b/t/t8020-last-modified.sh @@ -0,0 +1,203 @@ +#!/bin/sh + +test_description='last-modified tests' + +. ./test-lib.sh + +test_expect_success 'setup' ' + test_commit 1 file && + mkdir a && + test_commit 2 a/file && + mkdir a/b && + test_commit 3 a/b/file +' + +test_expect_success 'cannot run last-modified on two trees' ' + test_must_fail git last-modified HEAD HEAD~1 +' + +check_last_modified() { + local indir= && + while test $# != 0 + do + case "$1" in + -C) + indir="$2" + shift + ;; + *) + break + ;; + esac && + shift + done && + + cat >expect && + test_when_finished "rm -f tmp.*" && + git ${indir:+-C "$indir"} last-modified "$@" >tmp.1 && + git name-rev --annotate-stdin --name-only --tags \ + tmp.2 && + tr '\t' ' ' actual && + test_cmp expect actual +} + +test_expect_success 'last-modified non-recursive' ' + check_last_modified <<-\EOF + 3 a + 1 file + EOF +' + +test_expect_success 'last-modified recursive' ' + check_last_modified -r <<-\EOF + 3 a/b/file + 2 a/file + 1 file + EOF +' + +test_expect_success 'last-modified recursive with tree' ' + check_last_modified -t <<-\EOF + 3 a + 3 a/b + 3 a/b/file + 2 a/file + 1 file + EOF +' + +test_expect_success 'last-modified subdir' ' + check_last_modified a <<-\EOF + 3 a + EOF +' + +test_expect_success 'last-modified subdir recursive' ' + check_last_modified -r a <<-\EOF + 3 a/b/file + 2 a/file + EOF +' + +test_expect_success 'last-modified from non-HEAD commit' ' + check_last_modified HEAD^ <<-\EOF + 2 a + 1 file + EOF +' + +test_expect_success 'last-modified from subdir defaults to root' ' + check_last_modified -C a <<-\EOF + 3 a + 1 file + EOF +' + +test_expect_success 'last-modified from subdir uses relative pathspecs' ' + check_last_modified -C a -r b <<-\EOF + 3 a/b/file + EOF +' + +test_expect_success 'limit last-modified traversal by count' ' + check_last_modified -1 <<-\EOF + 3 a + ^2 file + EOF +' + +test_expect_success 'limit last-modified traversal by commit' ' + check_last_modified HEAD~2..HEAD <<-\EOF + 3 a + ^1 file + EOF +' + +test_expect_success 'only last-modified files in the current tree' ' + git rm -rf a && + git commit -m "remove a" && + check_last_modified <<-\EOF + 1 file + EOF +' + +test_expect_success 'cross merge boundaries in blaming' ' + git checkout HEAD^0 && + git rm -rf . && + test_commit m1 && + git checkout HEAD^ && + git rm -rf . && + test_commit m2 && + git merge m1 && + check_last_modified <<-\EOF + m2 m2.t + m1 m1.t + EOF +' + +test_expect_success 'last-modified merge for resolved conflicts' ' + git checkout HEAD^0 && + git rm -rf . && + test_commit c1 conflict && + git checkout HEAD^ && + git rm -rf . && + test_commit c2 conflict && + test_must_fail git merge c1 && + test_commit resolved conflict && + check_last_modified conflict <<-\EOF + resolved conflict + EOF +' + + +# Consider `file` with this content through history: +# +# A---B---B-------B---B +# \ / +# C---D +test_expect_success 'last-modified merge ignores content from branch' ' + git checkout HEAD^0 && + git rm -rf . && + test_commit a1 file A && + test_commit a2 file B && + test_commit a3 file C && + test_commit a4 file D && + git checkout a2 && + git merge --no-commit --no-ff a4 && + git checkout a2 -- file && + git merge --continue && + check_last_modified <<-\EOF + a2 file + EOF +' + +# Consider `file` with this content through history: +# +# A---B---B---C---D---B---B +# \ / +# B-------B +test_expect_success 'last-modified merge undoes changes' ' + git checkout HEAD^0 && + git rm -rf . && + test_commit b1 file A && + test_commit b2 file B && + test_commit b3 file C && + test_commit b4 file D && + git checkout b2 && + test_commit b5 file2 2 && + git checkout b4 && + git merge --no-commit --no-ff b5 && + git checkout b2 -- file && + git merge --continue && + check_last_modified <<-\EOF + b5 file2 + b2 file + EOF +' + +test_expect_success 'last-modified complains about unknown arguments' ' + test_must_fail git last-modified --foo 2>err && + grep "unknown last-modified argument: --foo" err +' + +test_done From 12cb770fe3b61660dd0adbf62b5f055b439fad57 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Mon, 30 Jun 2025 20:49:24 +0200 Subject: [PATCH 2/3] t/perf: add last-modified perf script This just runs some simple last-modified commands. We already test correctness in the regular suite, so this is just about finding performance regressions from one version to another. Based-on-patch-by: Jeff King Signed-off-by: Toon Claes --- t/meson.build | 1 + t/perf/p8020-last-modified.sh | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100755 t/perf/p8020-last-modified.sh diff --git a/t/meson.build b/t/meson.build index 904455e3ab7fe1..b74125b0479394 100644 --- a/t/meson.build +++ b/t/meson.build @@ -1155,6 +1155,7 @@ benchmarks = [ 'perf/p7820-grep-engines.sh', 'perf/p7821-grep-engines-fixed.sh', 'perf/p7822-grep-perl-character.sh', + 'perf/p8020-last-modified.sh', 'perf/p9210-scalar.sh', 'perf/p9300-fast-import-export.sh', ] diff --git a/t/perf/p8020-last-modified.sh b/t/perf/p8020-last-modified.sh new file mode 100755 index 00000000000000..cb1f98d3db9f4e --- /dev/null +++ b/t/perf/p8020-last-modified.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +test_description='last-modified perf tests' +. ./perf-lib.sh + +test_perf_default_repo + +test_perf 'top-level last-modified' ' + git last-modified HEAD +' + +test_perf 'top-level recursive last-modified' ' + git last-modified -r HEAD +' + +test_perf 'subdir last-modified' ' + git ls-tree -d HEAD >subtrees && + path="$(head -n 1 subtrees | cut -f2)" && + git last-modified -r HEAD -- "$path" +' + +test_done From bb498a3009161e978947d02512b0ba665c11cb12 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Mon, 30 Jun 2025 20:49:25 +0200 Subject: [PATCH 3/3] last-modified: use Bloom filters when available Our 'git last-modified' performs a revision walk, and computes a diff at each point in the walk to figure out whether a given revision changed any of the paths it considers interesting. When changed-path Bloom filters are available, we can avoid computing many such diffs. Before computing a diff, we first check if any of the remaining paths of interest were possibly changed at a given commit by consulting its Bloom filter. If any of them are, we are resigned to compute the diff. If none of those queries returned "maybe", we know that the given commit doesn't contain any changed paths which are interesting to us. So, we can avoid computing it in this case. Comparing the perf test results on git.git: Test HEAD~ HEAD ------------------------------------------------------------------------------------ 8020.1: top-level last-modified 4.49(4.34+0.11) 2.22(2.05+0.09) -50.6% 8020.2: top-level recursive last-modified 5.64(5.45+0.11) 5.62(5.30+0.11) -0.4% 8020.3: subdir last-modified 0.11(0.06+0.04) 0.07(0.03+0.04) -36.4% Based-on-patch-by: Taylor Blau Signed-off-by: Toon Claes --- builtin/last-modified.c | 48 ++++++++++++++++++++++++++++++++++++++-- commit-graph.c | 7 +++++- t/t8020-last-modified.sh | 2 ++ 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/builtin/last-modified.c b/builtin/last-modified.c index 5701dc3b46754c..7517b47398b693 100644 --- a/builtin/last-modified.c +++ b/builtin/last-modified.c @@ -1,5 +1,7 @@ #include "git-compat-util.h" +#include "bloom.h" #include "builtin.h" +#include "commit-graph.h" #include "commit.h" #include "config.h" #include "diff.h" @@ -17,6 +19,7 @@ struct last_modified_entry { struct hashmap_entry hashent; struct object_id oid; + struct bloom_key key; const char path[FLEX_ARRAY]; }; @@ -41,6 +44,12 @@ struct last_modified { static void last_modified_release(struct last_modified *lm) { + struct hashmap_iter iter; + struct last_modified_entry *ent; + + hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) + bloom_key_clear(&ent->key); + hashmap_clear_and_free(&lm->paths, struct last_modified_entry, hashent); release_revisions(&lm->rev); } @@ -62,6 +71,9 @@ static void add_path_from_diff(struct diff_queue_struct *q, FLEX_ALLOC_STR(ent, path, path); oidcpy(&ent->oid, &p->two->oid); + if (lm->rev.bloom_filter_settings) + bloom_key_fill(&ent->key, path, strlen(path), + lm->rev.bloom_filter_settings); hashmap_entry_init(&ent->hashent, strhash(ent->path)); hashmap_add(&lm->paths, &ent->hashent); } @@ -138,6 +150,7 @@ static void mark_path(const char *path, const struct object_id *oid, last_modified_emit(data->lm, path, data->commit); hashmap_remove(&data->lm->paths, &ent->hashent, path); + bloom_key_clear(&ent->key); free(ent); } @@ -181,6 +194,27 @@ static void last_modified_diff(struct diff_queue_struct *q, } } +static bool maybe_changed_path(struct last_modified *lm, struct commit *origin) +{ + struct bloom_filter *filter; + struct last_modified_entry *ent; + struct hashmap_iter iter; + + if (!lm->rev.bloom_filter_settings) + return true; + + filter = get_bloom_filter(lm->rev.repo, origin); + if (!filter) + return true; + + hashmap_for_each_entry(&lm->paths, &iter, ent, hashent) { + if (bloom_filter_contains(filter, &ent->key, + lm->rev.bloom_filter_settings)) + return true; + } + return false; +} + static int last_modified_run(struct last_modified *lm) { struct last_modified_callback_data data = { .lm = lm }; @@ -196,14 +230,22 @@ static int last_modified_run(struct last_modified *lm) if (!data.commit) BUG("paths remaining beyond boundary in last-modified"); + // TODO distinguish when boundary is the one touching paths and + // beyond it + if (data.commit->object.flags & BOUNDARY) { diff_tree_oid(lm->rev.repo->hash_algo->empty_tree, &data.commit->object.oid, "", &lm->rev.diffopt); diff_flush(&lm->rev.diffopt); - } else { - log_tree_commit(&lm->rev, data.commit); + + //break; } + + if (!maybe_changed_path(lm, data.commit)) + continue; + + log_tree_commit(&lm->rev, data.commit); } return 0; @@ -230,6 +272,8 @@ static int last_modified_init(struct last_modified *lm, struct repository *r, return argc; } + lm->rev.bloom_filter_settings = get_bloom_filter_settings(lm->rev.repo); + if (populate_paths_from_revs(lm) < 0) return error(_("unable to setup last-modified")); diff --git a/commit-graph.c b/commit-graph.c index bd7b6f5338bd9d..dc1f29dd2f34a3 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -820,7 +820,12 @@ int corrected_commit_dates_enabled(struct repository *r) struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r) { - struct commit_graph *g = r->objects->commit_graph; + struct commit_graph *g; + + if (!prepare_commit_graph(r)) + return NULL; + + g = r->objects->commit_graph; while (g) { if (g->bloom_filter_settings) return g->bloom_filter_settings; diff --git a/t/t8020-last-modified.sh b/t/t8020-last-modified.sh index 05c113a1f8dc4b..db63a57cad926d 100755 --- a/t/t8020-last-modified.sh +++ b/t/t8020-last-modified.sh @@ -113,6 +113,8 @@ test_expect_success 'limit last-modified traversal by commit' ' EOF ' +# TODO test exact at boundary + test_expect_success 'only last-modified files in the current tree' ' git rm -rf a && git commit -m "remove a" &&