Skip to content

Commit 15af58c

Browse files
stefanbellergitster
authored andcommitted
diffcore: add a pickaxe option to find a specific blob
Sometimes users are given a hash of an object and they want to identify it further (ex.: Use verify-pack to find the largest blobs, but what are these? or [1]) One might be tempted to extend git-describe to also work with blobs, such that `git describe <blob-id>` gives a description as '<commit-ish>:<path>'. This was implemented at [2]; as seen by the sheer number of responses (>110), it turns out this is tricky to get right. The hard part to get right is picking the correct 'commit-ish' as that could be the commit that (re-)introduced the blob or the blob that removed the blob; the blob could exist in different branches. Junio hinted at a different approach of solving this problem, which this patch implements. Teach the diff machinery another flag for restricting the information to what is shown. For example: $ ./git log --oneline --find-object=v2.0.0:Makefile b2feb64 Revert the whole "ask curl-config" topic for now 47fbfde i18n: only extract comments marked with "TRANSLATORS:" we observe that the Makefile as shipped with 2.0 was appeared in v1.9.2-471-g47fbfded53 and in v2.0.0-rc1-5-gb2feb6430b. The reason why these commits both occur prior to v2.0.0 are evil merges that are not found using this new mechanism. [1] https://stackoverflow.com/questions/223678/which-commit-has-this-blob [2] https://public-inbox.org/git/[email protected]/ Signed-off-by: Stefan Beller <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent cf63051 commit 15af58c

File tree

6 files changed

+135
-20
lines changed

6 files changed

+135
-20
lines changed

Documentation/diff-options.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,15 @@ occurrences of that string did not change).
492492
See the 'pickaxe' entry in linkgit:gitdiffcore[7] for more
493493
information.
494494

495+
--find-object=<object-id>::
496+
Look for differences that change the number of occurrences of
497+
the specified object. Similar to `-S`, just the argument is different
498+
in that it doesn't search for a specific string but for a specific
499+
object id.
500+
+
501+
The object can be a blob or a submodule commit. It implies the `-t` option in
502+
`git-log` to also find trees.
503+
495504
--pickaxe-all::
496505
When `-S` or `-G` finds a change, show all the changes in that
497506
changeset, not just the files that contain the change
@@ -500,6 +509,7 @@ information.
500509
--pickaxe-regex::
501510
Treat the <string> given to `-S` as an extended POSIX regular
502511
expression to match.
512+
503513
endif::git-format-patch[]
504514

505515
-O<orderfile>::

diff.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4082,6 +4082,7 @@ void diff_setup(struct diff_options *options)
40824082
options->interhunkcontext = diff_interhunk_context_default;
40834083
options->ws_error_highlight = ws_error_highlight_default;
40844084
options->flags.rename_empty = 1;
4085+
options->objfind = NULL;
40854086

40864087
/* pathchange left =NULL by default */
40874088
options->change = diff_change;
@@ -4487,6 +4488,23 @@ static int parse_ws_error_highlight_opt(struct diff_options *opt, const char *ar
44874488
return 1;
44884489
}
44894490

4491+
static int parse_objfind_opt(struct diff_options *opt, const char *arg)
4492+
{
4493+
struct object_id oid;
4494+
4495+
if (get_oid(arg, &oid))
4496+
return error("unable to resolve '%s'", arg);
4497+
4498+
if (!opt->objfind)
4499+
opt->objfind = xcalloc(1, sizeof(*opt->objfind));
4500+
4501+
opt->pickaxe_opts |= DIFF_PICKAXE_KIND_OBJFIND;
4502+
opt->flags.recursive = 1;
4503+
opt->flags.tree_in_recursive = 1;
4504+
oidset_insert(opt->objfind, &oid);
4505+
return 1;
4506+
}
4507+
44904508
int diff_opt_parse(struct diff_options *options,
44914509
const char **av, int ac, const char *prefix)
44924510
{
@@ -4736,7 +4754,8 @@ int diff_opt_parse(struct diff_options *options,
47364754
else if ((argcount = short_opt('O', av, &optarg))) {
47374755
options->orderfile = prefix_filename(prefix, optarg);
47384756
return argcount;
4739-
}
4757+
} else if (skip_prefix(arg, "--find-object=", &arg))
4758+
return parse_objfind_opt(options, arg);
47404759
else if ((argcount = parse_long_opt("diff-filter", av, &optarg))) {
47414760
int offending = parse_diff_filter_opt(optarg, options);
47424761
if (offending)

diff.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "tree-walk.h"
88
#include "pathspec.h"
99
#include "object.h"
10+
#include "oidset.h"
1011

1112
struct rev_info;
1213
struct diff_options;
@@ -173,6 +174,8 @@ struct diff_options {
173174
enum diff_words_type word_diff;
174175
enum diff_submodule_format submodule_format;
175176

177+
struct oidset *objfind;
178+
176179
/* this is set by diffcore for DIFF_FORMAT_PATCH */
177180
int found_changes;
178181

@@ -325,8 +328,11 @@ extern void diff_setup_done(struct diff_options *);
325328

326329
#define DIFF_PICKAXE_KIND_S 4 /* traditional plumbing counter */
327330
#define DIFF_PICKAXE_KIND_G 8 /* grep in the patch */
331+
#define DIFF_PICKAXE_KIND_OBJFIND 16 /* specific object IDs */
328332

329-
#define DIFF_PICKAXE_KINDS_MASK (DIFF_PICKAXE_KIND_S | DIFF_PICKAXE_KIND_G)
333+
#define DIFF_PICKAXE_KINDS_MASK (DIFF_PICKAXE_KIND_S | \
334+
DIFF_PICKAXE_KIND_G | \
335+
DIFF_PICKAXE_KIND_OBJFIND)
330336

331337
#define DIFF_PICKAXE_IGNORE_CASE 32
332338

diffcore-pickaxe.c

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -124,13 +124,20 @@ static int pickaxe_match(struct diff_filepair *p, struct diff_options *o,
124124
mmfile_t mf1, mf2;
125125
int ret;
126126

127-
if (!o->pickaxe[0])
128-
return 0;
129-
130127
/* ignore unmerged */
131128
if (!DIFF_FILE_VALID(p->one) && !DIFF_FILE_VALID(p->two))
132129
return 0;
133130

131+
if (o->objfind) {
132+
return (DIFF_FILE_VALID(p->one) &&
133+
oidset_contains(o->objfind, &p->one->oid)) ||
134+
(DIFF_FILE_VALID(p->two) &&
135+
oidset_contains(o->objfind, &p->two->oid));
136+
}
137+
138+
if (!o->pickaxe[0])
139+
return 0;
140+
134141
if (o->flags.allow_textconv) {
135142
textconv_one = get_textconv(p->one);
136143
textconv_two = get_textconv(p->two);
@@ -226,20 +233,22 @@ void diffcore_pickaxe(struct diff_options *o)
226233
cflags |= REG_ICASE;
227234
regcomp_or_die(&regex, needle, cflags);
228235
regexp = &regex;
229-
} else if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE &&
230-
has_non_ascii(needle)) {
231-
struct strbuf sb = STRBUF_INIT;
232-
int cflags = REG_NEWLINE | REG_ICASE;
233-
234-
basic_regex_quote_buf(&sb, needle);
235-
regcomp_or_die(&regex, sb.buf, cflags);
236-
strbuf_release(&sb);
237-
regexp = &regex;
238-
} else {
239-
kws = kwsalloc(o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE
240-
? tolower_trans_tbl : NULL);
241-
kwsincr(kws, needle, strlen(needle));
242-
kwsprep(kws);
236+
} else if (opts & DIFF_PICKAXE_KIND_S) {
237+
if (o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE &&
238+
has_non_ascii(needle)) {
239+
struct strbuf sb = STRBUF_INIT;
240+
int cflags = REG_NEWLINE | REG_ICASE;
241+
242+
basic_regex_quote_buf(&sb, needle);
243+
regcomp_or_die(&regex, sb.buf, cflags);
244+
strbuf_release(&sb);
245+
regexp = &regex;
246+
} else {
247+
kws = kwsalloc(o->pickaxe_opts & DIFF_PICKAXE_IGNORE_CASE
248+
? tolower_trans_tbl : NULL);
249+
kwsincr(kws, needle, strlen(needle));
250+
kwsprep(kws);
251+
}
243252
}
244253

245254
/* Might want to warn when both S and G are on; I don't care... */
@@ -248,7 +257,7 @@ void diffcore_pickaxe(struct diff_options *o)
248257

249258
if (regexp)
250259
regfree(regexp);
251-
else
260+
if (kws)
252261
kwsfree(kws);
253262
return;
254263
}

revision.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2412,6 +2412,9 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
24122412
revs->diffopt.flags.follow_renames)
24132413
revs->diff = 1;
24142414

2415+
if (revs->diffopt.objfind)
2416+
revs->simplify_history = 0;
2417+
24152418
if (revs->topo_order)
24162419
revs->limited = 1;
24172420

t/t4064-diff-oidfind.sh

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#!/bin/sh
2+
3+
test_description='test finding specific blobs in the revision walking'
4+
. ./test-lib.sh
5+
6+
test_expect_success 'setup ' '
7+
git commit --allow-empty -m "empty initial commit" &&
8+
9+
echo "Hello, world!" >greeting &&
10+
git add greeting &&
11+
git commit -m "add the greeting blob" && # borrowed from Git from the Bottom Up
12+
git tag -m "the blob" greeting $(git rev-parse HEAD:greeting) &&
13+
14+
echo asdf >unrelated &&
15+
git add unrelated &&
16+
git commit -m "unrelated history" &&
17+
18+
git revert HEAD^ &&
19+
20+
git commit --allow-empty -m "another unrelated commit"
21+
'
22+
23+
test_expect_success 'find the greeting blob' '
24+
cat >expect <<-EOF &&
25+
Revert "add the greeting blob"
26+
add the greeting blob
27+
EOF
28+
29+
git log --format=%s --find-object=greeting^{blob} >actual &&
30+
31+
test_cmp expect actual
32+
'
33+
34+
test_expect_success 'setup a tree' '
35+
mkdir a &&
36+
echo asdf >a/file &&
37+
git add a/file &&
38+
git commit -m "add a file in a subdirectory"
39+
'
40+
41+
test_expect_success 'find a tree' '
42+
cat >expect <<-EOF &&
43+
add a file in a subdirectory
44+
EOF
45+
46+
git log --format=%s -t --find-object=HEAD:a >actual &&
47+
48+
test_cmp expect actual
49+
'
50+
51+
test_expect_success 'setup a submodule' '
52+
test_create_repo sub &&
53+
test_commit -C sub sub &&
54+
git submodule add ./sub sub &&
55+
git commit -a -m "add sub"
56+
'
57+
58+
test_expect_success 'find a submodule' '
59+
cat >expect <<-EOF &&
60+
add sub
61+
EOF
62+
63+
git log --format=%s --find-object=HEAD:sub >actual &&
64+
65+
test_cmp expect actual
66+
'
67+
68+
test_done

0 commit comments

Comments
 (0)