Skip to content

Commit 160314e

Browse files
committed
Merge branch 'jz/patch-id'
A new "--include-whitespace" option is added to "git patch-id", and existing bugs in the internal patch-id logic that did not match what "git patch-id" produces have been corrected. * jz/patch-id: builtin: patch-id: remove unused diff-tree prefix builtin: patch-id: add --verbatim as a command mode patch-id: fix patch-id for mode changes builtin: patch-id: fix patch-id with binary diffs patch-id: use stable patch-id for rebases patch-id: fix stable patch id for binary / header-only
2 parents 63bba4f + 0d32ae8 commit 160314e

File tree

9 files changed

+287
-99
lines changed

9 files changed

+287
-99
lines changed

Documentation/git-patch-id.txt

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,18 @@ git-patch-id - Compute unique ID for a patch
88
SYNOPSIS
99
--------
1010
[verse]
11-
'git patch-id' [--stable | --unstable]
11+
'git patch-id' [--stable | --unstable | --verbatim]
1212

1313
DESCRIPTION
1414
-----------
1515
Read a patch from the standard input and compute the patch ID for it.
1616

1717
A "patch ID" is nothing but a sum of SHA-1 of the file diffs associated with a
18-
patch, with whitespace and line numbers ignored. As such, it's "reasonably
19-
stable", but at the same time also reasonably unique, i.e., two patches that
20-
have the same "patch ID" are almost guaranteed to be the same thing.
18+
patch, with line numbers ignored. As such, it's "reasonably stable", but at
19+
the same time also reasonably unique, i.e., two patches that have the same
20+
"patch ID" are almost guaranteed to be the same thing.
2121

22-
IOW, you can use this thing to look for likely duplicate commits.
22+
The main usecase for this command is to look for likely duplicate commits.
2323

2424
When dealing with 'git diff-tree' output, it takes advantage of
2525
the fact that the patch is prefixed with the object name of the
@@ -30,6 +30,12 @@ This can be used to make a mapping from patch ID to commit ID.
3030
OPTIONS
3131
-------
3232

33+
--verbatim::
34+
Calculate the patch-id of the input as it is given, do not strip
35+
any whitespace.
36+
37+
This is the default if patchid.verbatim is true.
38+
3339
--stable::
3440
Use a "stable" sum of hashes as the patch ID. With this option:
3541
- Reordering file diffs that make up a patch does not affect the ID.
@@ -45,14 +51,16 @@ OPTIONS
4551
of "-O<orderfile>", thereby making existing databases storing such
4652
"unstable" or historical patch-ids unusable.
4753

54+
- All whitespace within the patch is ignored and does not affect the id.
55+
4856
This is the default if patchid.stable is set to true.
4957

5058
--unstable::
5159
Use an "unstable" hash as the patch ID. With this option,
5260
the result produced is compatible with the patch-id value produced
53-
by git 1.9 and older. Users with pre-existing databases storing
54-
patch-ids produced by git 1.9 and older (who do not deal with reordered
55-
patches) may want to use this option.
61+
by git 1.9 and older and whitespace is ignored. Users with pre-existing
62+
databases storing patch-ids produced by git 1.9 and older (who do not deal
63+
with reordered patches) may want to use this option.
5664

5765
This is the default.
5866

builtin/log.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1763,7 +1763,7 @@ static void prepare_bases(struct base_tree_info *bases,
17631763
struct object_id *patch_id;
17641764
if (*commit_base_at(&commit_base, commit))
17651765
continue;
1766-
if (commit_patch_id(commit, &diffopt, &oid, 0, 1))
1766+
if (commit_patch_id(commit, &diffopt, &oid, 0))
17671767
die(_("cannot get patch id"));
17681768
ALLOC_GROW(bases->patch_id, bases->nr_patch_id + 1, bases->alloc_patch_id);
17691769
patch_id = bases->patch_id + bases->nr_patch_id;

builtin/patch-id.c

Lines changed: 84 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "builtin.h"
33
#include "config.h"
44
#include "diff.h"
5+
#include "parse-options.h"
56

67
static void flush_current_id(int patchlen, struct object_id *id, struct object_id *result)
78
{
@@ -57,10 +58,12 @@ static int scan_hunk_header(const char *p, int *p_before, int *p_after)
5758
}
5859

5960
static int get_one_patchid(struct object_id *next_oid, struct object_id *result,
60-
struct strbuf *line_buf, int stable)
61+
struct strbuf *line_buf, int stable, int verbatim)
6162
{
6263
int patchlen = 0, found_next = 0;
6364
int before = -1, after = -1;
65+
int diff_is_binary = 0;
66+
char pre_oid_str[GIT_MAX_HEXSZ + 1], post_oid_str[GIT_MAX_HEXSZ + 1];
6467
git_hash_ctx ctx;
6568

6669
the_hash_algo->init_fn(&ctx);
@@ -71,11 +74,14 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result,
7174
const char *p = line;
7275
int len;
7376

74-
if (!skip_prefix(line, "diff-tree ", &p) &&
75-
!skip_prefix(line, "commit ", &p) &&
77+
/* Possibly skip over the prefix added by "log" or "format-patch" */
78+
if (!skip_prefix(line, "commit ", &p) &&
7679
!skip_prefix(line, "From ", &p) &&
77-
starts_with(line, "\\ ") && 12 < strlen(line))
80+
starts_with(line, "\\ ") && 12 < strlen(line)) {
81+
if (verbatim)
82+
the_hash_algo->update_fn(&ctx, line, strlen(line));
7883
continue;
84+
}
7985

8086
if (!get_oid_hex(p, next_oid)) {
8187
found_next = 1;
@@ -88,14 +94,44 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result,
8894

8995
/* Parsing diff header? */
9096
if (before == -1) {
91-
if (starts_with(line, "index "))
97+
if (starts_with(line, "GIT binary patch") ||
98+
starts_with(line, "Binary files")) {
99+
diff_is_binary = 1;
100+
before = 0;
101+
the_hash_algo->update_fn(&ctx, pre_oid_str,
102+
strlen(pre_oid_str));
103+
the_hash_algo->update_fn(&ctx, post_oid_str,
104+
strlen(post_oid_str));
105+
if (stable)
106+
flush_one_hunk(result, &ctx);
107+
continue;
108+
} else if (skip_prefix(line, "index ", &p)) {
109+
char *oid1_end = strstr(line, "..");
110+
char *oid2_end = NULL;
111+
if (oid1_end)
112+
oid2_end = strstr(oid1_end, " ");
113+
if (!oid2_end)
114+
oid2_end = line + strlen(line) - 1;
115+
if (oid1_end != NULL && oid2_end != NULL) {
116+
*oid1_end = *oid2_end = '\0';
117+
strlcpy(pre_oid_str, p, GIT_MAX_HEXSZ + 1);
118+
strlcpy(post_oid_str, oid1_end + 2, GIT_MAX_HEXSZ + 1);
119+
}
92120
continue;
93-
else if (starts_with(line, "--- "))
121+
} else if (starts_with(line, "--- "))
94122
before = after = 1;
95123
else if (!isalpha(line[0]))
96124
break;
97125
}
98126

127+
if (diff_is_binary) {
128+
if (starts_with(line, "diff ")) {
129+
diff_is_binary = 0;
130+
before = -1;
131+
}
132+
continue;
133+
}
134+
99135
/* Looking for a valid hunk header? */
100136
if (before == 0 && after == 0) {
101137
if (starts_with(line, "@@ -")) {
@@ -120,8 +156,8 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result,
120156
if (line[0] == '+' || line[0] == ' ')
121157
after--;
122158

123-
/* Compute the sha without whitespace */
124-
len = remove_space(line);
159+
/* Add line to hash algo (possibly removing whitespace) */
160+
len = verbatim ? strlen(line) : remove_space(line);
125161
patchlen += len;
126162
the_hash_algo->update_fn(&ctx, line, len);
127163
}
@@ -134,29 +170,40 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result,
134170
return patchlen;
135171
}
136172

137-
static void generate_id_list(int stable)
173+
static void generate_id_list(int stable, int verbatim)
138174
{
139175
struct object_id oid, n, result;
140176
int patchlen;
141177
struct strbuf line_buf = STRBUF_INIT;
142178

143179
oidclr(&oid);
144180
while (!feof(stdin)) {
145-
patchlen = get_one_patchid(&n, &result, &line_buf, stable);
181+
patchlen = get_one_patchid(&n, &result, &line_buf, stable, verbatim);
146182
flush_current_id(patchlen, &oid, &result);
147183
oidcpy(&oid, &n);
148184
}
149185
strbuf_release(&line_buf);
150186
}
151187

152-
static const char patch_id_usage[] = "git patch-id [--stable | --unstable]";
188+
static const char *const patch_id_usage[] = {
189+
N_("git patch-id [--stable | --unstable | --verbatim]"), NULL
190+
};
191+
192+
struct patch_id_opts {
193+
int stable;
194+
int verbatim;
195+
};
153196

154197
static int git_patch_id_config(const char *var, const char *value, void *cb)
155198
{
156-
int *stable = cb;
199+
struct patch_id_opts *opts = cb;
157200

158201
if (!strcmp(var, "patchid.stable")) {
159-
*stable = git_config_bool(var, value);
202+
opts->stable = git_config_bool(var, value);
203+
return 0;
204+
}
205+
if (!strcmp(var, "patchid.verbatim")) {
206+
opts->verbatim = git_config_bool(var, value);
160207
return 0;
161208
}
162209

@@ -165,21 +212,29 @@ static int git_patch_id_config(const char *var, const char *value, void *cb)
165212

166213
int cmd_patch_id(int argc, const char **argv, const char *prefix)
167214
{
168-
int stable = -1;
169-
170-
git_config(git_patch_id_config, &stable);
171-
172-
/* If nothing is set, default to unstable. */
173-
if (stable < 0)
174-
stable = 0;
175-
176-
if (argc == 2 && !strcmp(argv[1], "--stable"))
177-
stable = 1;
178-
else if (argc == 2 && !strcmp(argv[1], "--unstable"))
179-
stable = 0;
180-
else if (argc != 1)
181-
usage(patch_id_usage);
182-
183-
generate_id_list(stable);
215+
/* if nothing is set, default to unstable */
216+
struct patch_id_opts config = {0, 0};
217+
int opts = 0;
218+
struct option builtin_patch_id_options[] = {
219+
OPT_CMDMODE(0, "unstable", &opts,
220+
N_("use the unstable patch-id algorithm"), 1),
221+
OPT_CMDMODE(0, "stable", &opts,
222+
N_("use the stable patch-id algorithm"), 2),
223+
OPT_CMDMODE(0, "verbatim", &opts,
224+
N_("don't strip whitespace from the patch"), 3),
225+
OPT_END()
226+
};
227+
228+
git_config(git_patch_id_config, &config);
229+
230+
/* verbatim implies stable */
231+
if (config.verbatim)
232+
config.stable = 1;
233+
234+
argc = parse_options(argc, argv, prefix, builtin_patch_id_options,
235+
patch_id_usage, 0);
236+
237+
generate_id_list(opts ? opts > 1 : config.stable,
238+
opts ? opts == 3 : config.verbatim);
184239
return 0;
185240
}

diff.c

Lines changed: 38 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -6229,7 +6229,7 @@ static void patch_id_add_mode(git_hash_ctx *ctx, unsigned mode)
62296229
}
62306230

62316231
/* returns 0 upon success, and writes result into oid */
6232-
static int diff_get_patch_id(struct diff_options *options, struct object_id *oid, int diff_header_only, int stable)
6232+
static int diff_get_patch_id(struct diff_options *options, struct object_id *oid, int diff_header_only)
62336233
{
62346234
struct diff_queue_struct *q = &diff_queued_diff;
62356235
int i;
@@ -6276,61 +6276,62 @@ static int diff_get_patch_id(struct diff_options *options, struct object_id *oid
62766276
if (p->one->mode == 0) {
62776277
patch_id_add_string(&ctx, "newfilemode");
62786278
patch_id_add_mode(&ctx, p->two->mode);
6279-
patch_id_add_string(&ctx, "---/dev/null");
6280-
patch_id_add_string(&ctx, "+++b/");
6281-
the_hash_algo->update_fn(&ctx, p->two->path, len2);
62826279
} else if (p->two->mode == 0) {
62836280
patch_id_add_string(&ctx, "deletedfilemode");
62846281
patch_id_add_mode(&ctx, p->one->mode);
6285-
patch_id_add_string(&ctx, "---a/");
6286-
the_hash_algo->update_fn(&ctx, p->one->path, len1);
6287-
patch_id_add_string(&ctx, "+++/dev/null");
6288-
} else {
6289-
patch_id_add_string(&ctx, "---a/");
6290-
the_hash_algo->update_fn(&ctx, p->one->path, len1);
6291-
patch_id_add_string(&ctx, "+++b/");
6292-
the_hash_algo->update_fn(&ctx, p->two->path, len2);
6282+
} else if (p->one->mode != p->two->mode) {
6283+
patch_id_add_string(&ctx, "oldmode");
6284+
patch_id_add_mode(&ctx, p->one->mode);
6285+
patch_id_add_string(&ctx, "newmode");
6286+
patch_id_add_mode(&ctx, p->two->mode);
62936287
}
62946288

6295-
if (diff_header_only)
6296-
continue;
6297-
6298-
if (fill_mmfile(options->repo, &mf1, p->one) < 0 ||
6299-
fill_mmfile(options->repo, &mf2, p->two) < 0)
6300-
return error("unable to read files to diff");
6301-
6302-
if (diff_filespec_is_binary(options->repo, p->one) ||
6289+
if (diff_header_only) {
6290+
/* don't do anything since we're only populating header info */
6291+
} else if (diff_filespec_is_binary(options->repo, p->one) ||
63036292
diff_filespec_is_binary(options->repo, p->two)) {
63046293
the_hash_algo->update_fn(&ctx, oid_to_hex(&p->one->oid),
63056294
the_hash_algo->hexsz);
63066295
the_hash_algo->update_fn(&ctx, oid_to_hex(&p->two->oid),
63076296
the_hash_algo->hexsz);
6308-
continue;
6309-
}
6310-
6311-
xpp.flags = 0;
6312-
xecfg.ctxlen = 3;
6313-
xecfg.flags = XDL_EMIT_NO_HUNK_HDR;
6314-
if (xdi_diff_outf(&mf1, &mf2, NULL,
6315-
patch_id_consume, &data, &xpp, &xecfg))
6316-
return error("unable to generate patch-id diff for %s",
6317-
p->one->path);
6297+
} else {
6298+
if (p->one->mode == 0) {
6299+
patch_id_add_string(&ctx, "---/dev/null");
6300+
patch_id_add_string(&ctx, "+++b/");
6301+
the_hash_algo->update_fn(&ctx, p->two->path, len2);
6302+
} else if (p->two->mode == 0) {
6303+
patch_id_add_string(&ctx, "---a/");
6304+
the_hash_algo->update_fn(&ctx, p->one->path, len1);
6305+
patch_id_add_string(&ctx, "+++/dev/null");
6306+
} else {
6307+
patch_id_add_string(&ctx, "---a/");
6308+
the_hash_algo->update_fn(&ctx, p->one->path, len1);
6309+
patch_id_add_string(&ctx, "+++b/");
6310+
the_hash_algo->update_fn(&ctx, p->two->path, len2);
6311+
}
63186312

6319-
if (stable)
6320-
flush_one_hunk(oid, &ctx);
6313+
if (fill_mmfile(options->repo, &mf1, p->one) < 0 ||
6314+
fill_mmfile(options->repo, &mf2, p->two) < 0)
6315+
return error("unable to read files to diff");
6316+
xpp.flags = 0;
6317+
xecfg.ctxlen = 3;
6318+
xecfg.flags = XDL_EMIT_NO_HUNK_HDR;
6319+
if (xdi_diff_outf(&mf1, &mf2, NULL,
6320+
patch_id_consume, &data, &xpp, &xecfg))
6321+
return error("unable to generate patch-id diff for %s",
6322+
p->one->path);
6323+
}
6324+
flush_one_hunk(oid, &ctx);
63216325
}
63226326

6323-
if (!stable)
6324-
the_hash_algo->final_oid_fn(oid, &ctx);
6325-
63266327
return 0;
63276328
}
63286329

6329-
int diff_flush_patch_id(struct diff_options *options, struct object_id *oid, int diff_header_only, int stable)
6330+
int diff_flush_patch_id(struct diff_options *options, struct object_id *oid, int diff_header_only)
63306331
{
63316332
struct diff_queue_struct *q = &diff_queued_diff;
63326333
int i;
6333-
int result = diff_get_patch_id(options, oid, diff_header_only, stable);
6334+
int result = diff_get_patch_id(options, oid, diff_header_only);
63346335

63356336
for (i = 0; i < q->nr; i++)
63366337
diff_free_filepair(q->queue[i]);

diff.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -634,7 +634,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option);
634634
int run_diff_index(struct rev_info *revs, unsigned int option);
635635

636636
int do_diff_cache(const struct object_id *, struct diff_options *);
637-
int diff_flush_patch_id(struct diff_options *, struct object_id *, int, int);
637+
int diff_flush_patch_id(struct diff_options *, struct object_id *, int);
638638
void flush_one_hunk(struct object_id *result, git_hash_ctx *ctx);
639639

640640
int diff_result_code(struct diff_options *, int);

0 commit comments

Comments
 (0)