Skip to content

Commit 66ec237

Browse files
committed
Merge branch 'ab/fsck-skiplist'
Update fsck.skipList implementation and documentation. * ab/fsck-skiplist: fsck: support comments & empty lines in skipList fsck: use oidset instead of oid_array for skipList fsck: use strbuf_getline() to read skiplist file fsck: add a performance test for skipList fsck: add a performance test fsck: document that skipList input must be unabbreviated fsck: document and test commented & empty line skipList input fsck: document and test sorted skipList input fsck tests: add a test for no skipList input fsck tests: setup of bogus commit object
2 parents 468b322 + 371a655 commit 66ec237

File tree

6 files changed

+171
-45
lines changed

6 files changed

+171
-45
lines changed

Documentation/config.txt

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1567,12 +1567,16 @@ doing the same for `receive.fsck.<msg-id>` and `fetch.fsck.<msg-id>`
15671567
will only cause git to warn.
15681568

15691569
fsck.skipList::
1570-
The path to a sorted list of object names (i.e. one SHA-1 per
1570+
The path to a list of object names (i.e. one unabbreviated SHA-1 per
15711571
line) that are known to be broken in a non-fatal way and should
1572-
be ignored. This feature is useful when an established project
1573-
should be accepted despite early commits containing errors that
1574-
can be safely ignored such as invalid committer email addresses.
1575-
Note: corrupt objects cannot be skipped with this setting.
1572+
be ignored. On versions of Git 2.20 and later comments ('#'), empty
1573+
lines, and any leading and trailing whitespace is ignored. Everything
1574+
but a SHA-1 per line will error out on older versions.
1575+
+
1576+
This feature is useful when an established project should be accepted
1577+
despite early commits containing errors that can be safely ignored
1578+
such as invalid committer email addresses. Note: corrupt objects
1579+
cannot be skipped with this setting.
15761580
+
15771581
Like `fsck.<msg-id>` this variable has corresponding
15781582
`receive.fsck.skipList` and `fetch.fsck.skipList` variants.
@@ -1582,6 +1586,15 @@ Unlike variables like `color.ui` and `core.editor` the
15821586
fall back on the `fsck.skipList` configuration if they aren't set. To
15831587
uniformly configure the same fsck settings in different circumstances
15841588
all three of them they must all set to the same values.
1589+
+
1590+
Older versions of Git (before 2.20) documented that the object names
1591+
list should be sorted. This was never a requirement, the object names
1592+
could appear in any order, but when reading the list we tracked whether
1593+
the list was sorted for the purposes of an internal binary search
1594+
implementation, which could save itself some work with an already sorted
1595+
list. Unless you had a humongous list there was no reason to go out of
1596+
your way to pre-sort the list. After Git version 2.20 a hash implementation
1597+
is used instead, so there's now no reason to pre-sort the list.
15851598

15861599
gc.aggressiveDepth::
15871600
The depth parameter used in the delta compression

fsck.c

Lines changed: 27 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
#include "fsck.h"
1111
#include "refs.h"
1212
#include "utf8.h"
13-
#include "sha1-array.h"
1413
#include "decorate.h"
1514
#include "oidset.h"
1615
#include "packfile.h"
@@ -184,40 +183,37 @@ static int fsck_msg_type(enum fsck_msg_id msg_id,
184183

185184
static void init_skiplist(struct fsck_options *options, const char *path)
186185
{
187-
static struct oid_array skiplist = OID_ARRAY_INIT;
188-
int sorted, fd;
189-
char buffer[GIT_MAX_HEXSZ + 1];
186+
FILE *fp;
187+
struct strbuf sb = STRBUF_INIT;
190188
struct object_id oid;
191189

192-
if (options->skiplist)
193-
sorted = options->skiplist->sorted;
194-
else {
195-
sorted = 1;
196-
options->skiplist = &skiplist;
197-
}
198-
199-
fd = open(path, O_RDONLY);
200-
if (fd < 0)
190+
fp = fopen(path, "r");
191+
if (!fp)
201192
die("Could not open skip list: %s", path);
202-
for (;;) {
193+
while (!strbuf_getline(&sb, fp)) {
203194
const char *p;
204-
int result = read_in_full(fd, buffer, sizeof(buffer));
205-
if (result < 0)
206-
die_errno("Could not read '%s'", path);
207-
if (!result)
208-
break;
209-
if (parse_oid_hex(buffer, &oid, &p) || *p != '\n')
210-
die("Invalid SHA-1: %s", buffer);
211-
oid_array_append(&skiplist, &oid);
212-
if (sorted && skiplist.nr > 1 &&
213-
oidcmp(&skiplist.oid[skiplist.nr - 2],
214-
&oid) > 0)
215-
sorted = 0;
216-
}
217-
close(fd);
195+
const char *hash;
218196

219-
if (sorted)
220-
skiplist.sorted = 1;
197+
/*
198+
* Allow trailing comments, leading whitespace
199+
* (including before commits), and empty or whitespace
200+
* only lines.
201+
*/
202+
hash = strchr(sb.buf, '#');
203+
if (hash)
204+
strbuf_setlen(&sb, hash - sb.buf);
205+
strbuf_trim(&sb);
206+
if (!sb.len)
207+
continue;
208+
209+
if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0')
210+
die("Invalid SHA-1: %s", sb.buf);
211+
oidset_insert(&options->skiplist, &oid);
212+
}
213+
if (ferror(fp))
214+
die_errno("Could not read '%s'", path);
215+
fclose(fp);
216+
strbuf_release(&sb);
221217
}
222218

223219
static int parse_msg_type(const char *str)
@@ -322,9 +318,7 @@ static void append_msg_id(struct strbuf *sb, const char *msg_id)
322318

323319
static int object_on_skiplist(struct fsck_options *opts, struct object *obj)
324320
{
325-
if (opts && opts->skiplist && obj)
326-
return oid_array_lookup(opts->skiplist, &obj->oid) >= 0;
327-
return 0;
321+
return opts && obj && oidset_contains(&opts->skiplist, &obj->oid);
328322
}
329323

330324
__attribute__((format (printf, 4, 5)))

fsck.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#ifndef GIT_FSCK_H
22
#define GIT_FSCK_H
33

4+
#include "oidset.h"
5+
46
#define FSCK_ERROR 1
57
#define FSCK_WARN 2
68
#define FSCK_IGNORE 3
@@ -35,12 +37,12 @@ struct fsck_options {
3537
fsck_error error_func;
3638
unsigned strict:1;
3739
int *msg_type;
38-
struct oid_array *skiplist;
40+
struct oidset skiplist;
3941
struct decoration *object_names;
4042
};
4143

42-
#define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL }
43-
#define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL }
44+
#define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL, OIDSET_INIT }
45+
#define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL, OIDSET_INIT }
4446

4547
/* descend in all linked child objects
4648
* the return value is:

t/perf/p1450-fsck.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/bin/sh
2+
3+
test_description='Test fsck performance'
4+
5+
. ./perf-lib.sh
6+
7+
test_perf_large_repo
8+
9+
test_perf 'fsck' '
10+
git fsck
11+
'
12+
13+
test_done

t/perf/p1451-fsck-skip-list.sh

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/bin/sh
2+
3+
test_description='Test fsck skipList performance'
4+
5+
. ./perf-lib.sh
6+
7+
test_perf_fresh_repo
8+
9+
n=1000000
10+
11+
test_expect_success "setup $n bad commits" '
12+
for i in $(test_seq 1 $n)
13+
do
14+
echo "commit refs/heads/master" &&
15+
echo "committer C <[email protected]> 1234567890 +0000" &&
16+
echo "data <<EOF" &&
17+
echo "$i.Q." &&
18+
echo "EOF"
19+
done | q_to_nul | git fast-import
20+
'
21+
22+
skip=0
23+
while test $skip -le $n
24+
do
25+
test_expect_success "create skipList for $skip bad commits" '
26+
git log --format=%H --max-count=$skip |
27+
sort >skiplist
28+
'
29+
30+
test_perf "fsck with $skip skipped bad commits" '
31+
git -c fsck.skipList=skiplist fsck
32+
'
33+
34+
case $skip in
35+
0) skip=1 ;;
36+
*) skip=${skip}0 ;;
37+
esac
38+
done
39+
40+
test_done

t/t5504-fetch-receive-strict.sh

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,34 @@ committer Bugs Bunny <[email protected]> 1234567890 +0000
133133
This commit object intentionally broken
134134
EOF
135135

136+
test_expect_success 'setup bogus commit' '
137+
commit="$(git hash-object -t commit -w --stdin <bogus-commit)"
138+
'
139+
140+
test_expect_success 'fsck with no skipList input' '
141+
test_must_fail git fsck 2>err &&
142+
test_i18ngrep "missingEmail" err
143+
'
144+
145+
test_expect_success 'setup sorted and unsorted skipLists' '
146+
cat >SKIP.unsorted <<-EOF &&
147+
0000000000000000000000000000000000000004
148+
0000000000000000000000000000000000000002
149+
$commit
150+
0000000000000000000000000000000000000001
151+
0000000000000000000000000000000000000003
152+
EOF
153+
sort SKIP.unsorted >SKIP.sorted
154+
'
155+
156+
test_expect_success 'fsck with sorted skipList' '
157+
git -c fsck.skipList=SKIP.sorted fsck
158+
'
159+
160+
test_expect_success 'fsck with unsorted skipList' '
161+
git -c fsck.skipList=SKIP.unsorted fsck
162+
'
163+
136164
test_expect_success 'fsck with invalid or bogus skipList input' '
137165
git -c fsck.skipList=/dev/null -c fsck.missingEmail=ignore fsck &&
138166
test_must_fail git -c fsck.skipList=does-not-exist -c fsck.missingEmail=ignore fsck 2>err &&
@@ -141,8 +169,47 @@ test_expect_success 'fsck with invalid or bogus skipList input' '
141169
test_i18ngrep "Invalid SHA-1: \[core\]" err
142170
'
143171

172+
test_expect_success 'fsck with other accepted skipList input (comments & empty lines)' '
173+
cat >SKIP.with-comment <<-EOF &&
174+
# Some bad commit
175+
0000000000000000000000000000000000000001
176+
EOF
177+
test_must_fail git -c fsck.skipList=SKIP.with-comment fsck 2>err-with-comment &&
178+
test_i18ngrep "missingEmail" err-with-comment &&
179+
cat >SKIP.with-empty-line <<-EOF &&
180+
0000000000000000000000000000000000000001
181+
182+
0000000000000000000000000000000000000002
183+
EOF
184+
test_must_fail git -c fsck.skipList=SKIP.with-empty-line fsck 2>err-with-empty-line &&
185+
test_i18ngrep "missingEmail" err-with-empty-line
186+
'
187+
188+
test_expect_success 'fsck no garbage output from comments & empty lines errors' '
189+
test_line_count = 1 err-with-comment &&
190+
test_line_count = 1 err-with-empty-line
191+
'
192+
193+
test_expect_success 'fsck with invalid abbreviated skipList input' '
194+
echo $commit | test_copy_bytes 20 >SKIP.abbreviated &&
195+
test_must_fail git -c fsck.skipList=SKIP.abbreviated fsck 2>err-abbreviated &&
196+
test_i18ngrep "^fatal: Invalid SHA-1: " err-abbreviated
197+
'
198+
199+
test_expect_success 'fsck with exhaustive accepted skipList input (various types of comments etc.)' '
200+
>SKIP.exhaustive &&
201+
echo "# A commented line" >>SKIP.exhaustive &&
202+
echo "" >>SKIP.exhaustive &&
203+
echo " " >>SKIP.exhaustive &&
204+
echo " # Comment after whitespace" >>SKIP.exhaustive &&
205+
echo "$commit # Our bad commit (with leading whitespace and trailing comment)" >>SKIP.exhaustive &&
206+
echo "# Some bad commit (leading whitespace)" >>SKIP.exhaustive &&
207+
echo " 0000000000000000000000000000000000000001" >>SKIP.exhaustive &&
208+
git -c fsck.skipList=SKIP.exhaustive fsck 2>err &&
209+
test_must_be_empty err
210+
'
211+
144212
test_expect_success 'push with receive.fsck.skipList' '
145-
commit="$(git hash-object -t commit -w --stdin <bogus-commit)" &&
146213
git push . $commit:refs/heads/bogus &&
147214
rm -rf dst &&
148215
git init dst &&
@@ -169,7 +236,6 @@ test_expect_success 'push with receive.fsck.skipList' '
169236
'
170237

171238
test_expect_success 'fetch with fetch.fsck.skipList' '
172-
commit="$(git hash-object -t commit -w --stdin <bogus-commit)" &&
173239
refspec=refs/heads/bogus:refs/heads/bogus &&
174240
git push . $commit:refs/heads/bogus &&
175241
rm -rf dst &&
@@ -204,7 +270,6 @@ test_expect_success 'fsck.<unknownmsg-id> dies' '
204270
'
205271

206272
test_expect_success 'push with receive.fsck.missingEmail=warn' '
207-
commit="$(git hash-object -t commit -w --stdin <bogus-commit)" &&
208273
git push . $commit:refs/heads/bogus &&
209274
rm -rf dst &&
210275
git init dst &&
@@ -232,7 +297,6 @@ test_expect_success 'push with receive.fsck.missingEmail=warn' '
232297
'
233298

234299
test_expect_success 'fetch with fetch.fsck.missingEmail=warn' '
235-
commit="$(git hash-object -t commit -w --stdin <bogus-commit)" &&
236300
refspec=refs/heads/bogus:refs/heads/bogus &&
237301
git push . $commit:refs/heads/bogus &&
238302
rm -rf dst &&

0 commit comments

Comments
 (0)