Skip to content

Commit 2871f4d

Browse files
jerry-skydiogitster
authored andcommitted
builtin: patch-id: add --verbatim as a command mode
There are situations where the user might not want the default setting where patch-id strips all whitespace. They might be working in a language where white space is syntactically important, or they might have CI testing that enforces strict whitespace linting. In these cases, a whitespace change would result in the patch fundamentally changing, and thus deserving of a different id. Add a new mode that is exclusive of --stable and --unstable called --verbatim. It also corresponds to the config patchid.verbatim = true. In this mode, the stable algorithm is used and whitespace is not stripped from the patch text. Users of --unstable mainly care about compatibility with old git versions, which unstripping the whitespace would break. Thus there isn't a usecase for the combination of --verbatim and --unstable, and we don't expose this so as to not add maintainence burden. Signed-off-by: Jerry Zhang <[email protected]> fixes https://github.com/Skydio/revup/issues/2 Signed-off-by: Junio C Hamano <[email protected]>
1 parent 93105ab commit 2871f4d

File tree

3 files changed

+124
-39
lines changed

3 files changed

+124
-39
lines changed

Documentation/git-patch-id.txt

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,18 @@ git-patch-id - Compute unique ID for a patch
88
SYNOPSIS
99
--------
1010
[verse]
11-
'git patch-id' [--stable | --unstable]
11+
'git patch-id' [--stable | --unstable | --verbatim]
1212

1313
DESCRIPTION
1414
-----------
1515
Read a patch from the standard input and compute the patch ID for it.
1616

1717
A "patch ID" is nothing but a sum of SHA-1 of the file diffs associated with a
18-
patch, with whitespace and line numbers ignored. As such, it's "reasonably
19-
stable", but at the same time also reasonably unique, i.e., two patches that
20-
have the same "patch ID" are almost guaranteed to be the same thing.
18+
patch, with line numbers ignored. As such, it's "reasonably stable", but at
19+
the same time also reasonably unique, i.e., two patches that have the same
20+
"patch ID" are almost guaranteed to be the same thing.
2121

22-
IOW, you can use this thing to look for likely duplicate commits.
22+
The main usecase for this command is to look for likely duplicate commits.
2323

2424
When dealing with 'git diff-tree' output, it takes advantage of
2525
the fact that the patch is prefixed with the object name of the
@@ -30,6 +30,12 @@ This can be used to make a mapping from patch ID to commit ID.
3030
OPTIONS
3131
-------
3232

33+
--verbatim::
34+
Calculate the patch-id of the input as it is given, do not strip
35+
any whitespace.
36+
37+
This is the default if patchid.verbatim is true.
38+
3339
--stable::
3440
Use a "stable" sum of hashes as the patch ID. With this option:
3541
- Reordering file diffs that make up a patch does not affect the ID.
@@ -45,14 +51,16 @@ OPTIONS
4551
of "-O<orderfile>", thereby making existing databases storing such
4652
"unstable" or historical patch-ids unusable.
4753

54+
- All whitespace within the patch is ignored and does not affect the id.
55+
4856
This is the default if patchid.stable is set to true.
4957

5058
--unstable::
5159
Use an "unstable" hash as the patch ID. With this option,
5260
the result produced is compatible with the patch-id value produced
53-
by git 1.9 and older. Users with pre-existing databases storing
54-
patch-ids produced by git 1.9 and older (who do not deal with reordered
55-
patches) may want to use this option.
61+
by git 1.9 and older and whitespace is ignored. Users with pre-existing
62+
databases storing patch-ids produced by git 1.9 and older (who do not deal
63+
with reordered patches) may want to use this option.
5664

5765
This is the default.
5866

builtin/patch-id.c

Lines changed: 48 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "builtin.h"
33
#include "config.h"
44
#include "diff.h"
5+
#include "parse-options.h"
56

67
static void flush_current_id(int patchlen, struct object_id *id, struct object_id *result)
78
{
@@ -57,7 +58,7 @@ static int scan_hunk_header(const char *p, int *p_before, int *p_after)
5758
}
5859

5960
static int get_one_patchid(struct object_id *next_oid, struct object_id *result,
60-
struct strbuf *line_buf, int stable)
61+
struct strbuf *line_buf, int stable, int verbatim)
6162
{
6263
int patchlen = 0, found_next = 0;
6364
int before = -1, after = -1;
@@ -76,8 +77,11 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result,
7677
if (!skip_prefix(line, "diff-tree ", &p) &&
7778
!skip_prefix(line, "commit ", &p) &&
7879
!skip_prefix(line, "From ", &p) &&
79-
starts_with(line, "\\ ") && 12 < strlen(line))
80+
starts_with(line, "\\ ") && 12 < strlen(line)) {
81+
if (verbatim)
82+
the_hash_algo->update_fn(&ctx, line, strlen(line));
8083
continue;
84+
}
8185

8286
if (!get_oid_hex(p, next_oid)) {
8387
found_next = 1;
@@ -152,8 +156,8 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result,
152156
if (line[0] == '+' || line[0] == ' ')
153157
after--;
154158

155-
/* Compute the sha without whitespace */
156-
len = remove_space(line);
159+
/* Add line to hash algo (possibly removing whitespace) */
160+
len = verbatim ? strlen(line) : remove_space(line);
157161
patchlen += len;
158162
the_hash_algo->update_fn(&ctx, line, len);
159163
}
@@ -166,29 +170,40 @@ static int get_one_patchid(struct object_id *next_oid, struct object_id *result,
166170
return patchlen;
167171
}
168172

169-
static void generate_id_list(int stable)
173+
static void generate_id_list(int stable, int verbatim)
170174
{
171175
struct object_id oid, n, result;
172176
int patchlen;
173177
struct strbuf line_buf = STRBUF_INIT;
174178

175179
oidclr(&oid);
176180
while (!feof(stdin)) {
177-
patchlen = get_one_patchid(&n, &result, &line_buf, stable);
181+
patchlen = get_one_patchid(&n, &result, &line_buf, stable, verbatim);
178182
flush_current_id(patchlen, &oid, &result);
179183
oidcpy(&oid, &n);
180184
}
181185
strbuf_release(&line_buf);
182186
}
183187

184-
static const char patch_id_usage[] = "git patch-id [--stable | --unstable]";
188+
static const char *const patch_id_usage[] = {
189+
N_("git patch-id [--stable | --unstable | --verbatim]"), NULL
190+
};
191+
192+
struct patch_id_opts {
193+
int stable;
194+
int verbatim;
195+
};
185196

186197
static int git_patch_id_config(const char *var, const char *value, void *cb)
187198
{
188-
int *stable = cb;
199+
struct patch_id_opts *opts = cb;
189200

190201
if (!strcmp(var, "patchid.stable")) {
191-
*stable = git_config_bool(var, value);
202+
opts->stable = git_config_bool(var, value);
203+
return 0;
204+
}
205+
if (!strcmp(var, "patchid.verbatim")) {
206+
opts->verbatim = git_config_bool(var, value);
192207
return 0;
193208
}
194209

@@ -197,21 +212,29 @@ static int git_patch_id_config(const char *var, const char *value, void *cb)
197212

198213
int cmd_patch_id(int argc, const char **argv, const char *prefix)
199214
{
200-
int stable = -1;
201-
202-
git_config(git_patch_id_config, &stable);
203-
204-
/* If nothing is set, default to unstable. */
205-
if (stable < 0)
206-
stable = 0;
207-
208-
if (argc == 2 && !strcmp(argv[1], "--stable"))
209-
stable = 1;
210-
else if (argc == 2 && !strcmp(argv[1], "--unstable"))
211-
stable = 0;
212-
else if (argc != 1)
213-
usage(patch_id_usage);
214-
215-
generate_id_list(stable);
215+
/* if nothing is set, default to unstable */
216+
struct patch_id_opts config = {0, 0};
217+
int opts = 0;
218+
struct option builtin_patch_id_options[] = {
219+
OPT_CMDMODE(0, "unstable", &opts,
220+
N_("use the unstable patch-id algorithm"), 1),
221+
OPT_CMDMODE(0, "stable", &opts,
222+
N_("use the stable patch-id algorithm"), 2),
223+
OPT_CMDMODE(0, "verbatim", &opts,
224+
N_("don't strip whitespace from the patch"), 3),
225+
OPT_END()
226+
};
227+
228+
git_config(git_patch_id_config, &config);
229+
230+
/* verbatim implies stable */
231+
if (config.verbatim)
232+
config.stable = 1;
233+
234+
argc = parse_options(argc, argv, prefix, builtin_patch_id_options,
235+
patch_id_usage, 0);
236+
237+
generate_id_list(opts ? opts > 1 : config.stable,
238+
opts ? opts == 3 : config.verbatim);
216239
return 0;
217240
}

t/t4204-patch-id.sh

Lines changed: 60 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,37 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
88
. ./test-lib.sh
99

1010
test_expect_success 'setup' '
11-
as="a a a a a a a a" && # eight a
12-
test_write_lines $as >foo &&
13-
test_write_lines $as >bar &&
11+
str="ab cd ef gh ij kl mn op" &&
12+
test_write_lines $str >foo &&
13+
test_write_lines $str >bar &&
1414
git add foo bar &&
1515
git commit -a -m initial &&
16-
test_write_lines $as b >foo &&
17-
test_write_lines $as b >bar &&
16+
test_write_lines $str b >foo &&
17+
test_write_lines $str b >bar &&
1818
git commit -a -m first &&
1919
git checkout -b same main &&
2020
git commit --amend -m same-msg &&
2121
git checkout -b notsame main &&
2222
echo c >foo &&
2323
echo c >bar &&
2424
git commit --amend -a -m notsame-msg &&
25+
git checkout -b with_space main~ &&
26+
cat >foo <<-\EOF &&
27+
a b
28+
c d
29+
e f
30+
g h
31+
i j
32+
k l
33+
m n
34+
op
35+
EOF
36+
cp foo bar &&
37+
git add foo bar &&
38+
git commit --amend -m "with spaces" &&
2539
test_write_lines bar foo >bar-then-foo &&
2640
test_write_lines foo bar >foo-then-bar
41+
2742
'
2843

2944
test_expect_success 'patch-id output is well-formed' '
@@ -128,9 +143,21 @@ test_patch_id_file_order () {
128143
git format-patch -1 --stdout -O foo-then-bar >format-patch.output &&
129144
calc_patch_id <format-patch.output "ordered-$name" "$@" &&
130145
cmp_patch_id $relevant "$name" "ordered-$name"
146+
}
131147

148+
test_patch_id_whitespace () {
149+
relevant="$1"
150+
shift
151+
name="ws-${1}-$relevant"
152+
shift
153+
get_top_diff "main~" >top-diff.output &&
154+
calc_patch_id <top-diff.output "$name" "$@" &&
155+
get_top_diff "with_space" >top-diff.output &&
156+
calc_patch_id <top-diff.output "ws-$name" "$@" &&
157+
cmp_patch_id $relevant "$name" "ws-$name"
132158
}
133159

160+
134161
# combined test for options: add more tests here to make them
135162
# run with all options
136163
test_patch_id () {
@@ -146,6 +173,14 @@ test_expect_success 'file order is relevant with --unstable' '
146173
test_patch_id_file_order relevant --unstable --unstable
147174
'
148175

176+
test_expect_success 'whitespace is relevant with --verbatim' '
177+
test_patch_id_whitespace relevant --verbatim --verbatim
178+
'
179+
180+
test_expect_success 'whitespace is irrelevant without --verbatim' '
181+
test_patch_id_whitespace irrelevant --stable --stable
182+
'
183+
149184
#Now test various option combinations.
150185
test_expect_success 'default is unstable' '
151186
test_patch_id relevant default
@@ -161,6 +196,17 @@ test_expect_success 'patchid.stable = false is unstable' '
161196
test_patch_id relevant patchid.stable=false
162197
'
163198

199+
test_expect_success 'patchid.verbatim = true is correct and stable' '
200+
test_config patchid.verbatim true &&
201+
test_patch_id_whitespace relevant patchid.verbatim=true &&
202+
test_patch_id irrelevant patchid.verbatim=true
203+
'
204+
205+
test_expect_success 'patchid.verbatim = false is unstable' '
206+
test_config patchid.verbatim false &&
207+
test_patch_id relevant patchid.verbatim=false
208+
'
209+
164210
test_expect_success '--unstable overrides patchid.stable = true' '
165211
test_config patchid.stable true &&
166212
test_patch_id relevant patchid.stable=true--unstable --unstable
@@ -171,6 +217,11 @@ test_expect_success '--stable overrides patchid.stable = false' '
171217
test_patch_id irrelevant patchid.stable=false--stable --stable
172218
'
173219

220+
test_expect_success '--verbatim overrides patchid.stable = false' '
221+
test_config patchid.stable false &&
222+
test_patch_id_whitespace relevant stable=false--verbatim --verbatim
223+
'
224+
174225
test_expect_success 'patch-id supports git-format-patch MIME output' '
175226
get_patch_id main &&
176227
git checkout same &&
@@ -225,7 +276,10 @@ test_expect_success 'patch-id handles no-nl-at-eof markers' '
225276
EOF
226277
calc_patch_id nonl <nonl &&
227278
calc_patch_id withnl <withnl &&
228-
test_cmp patch-id_nonl patch-id_withnl
279+
test_cmp patch-id_nonl patch-id_withnl &&
280+
calc_patch_id nonl-inc-ws --verbatim <nonl &&
281+
calc_patch_id withnl-inc-ws --verbatim <withnl &&
282+
! test_cmp patch-id_nonl-inc-ws patch-id_withnl-inc-ws
229283
'
230284

231285
test_expect_success 'patch-id handles diffs with one line of before/after' '

0 commit comments

Comments
 (0)