Skip to content

Commit 3b41fb0

Browse files
rscharfegitster
authored andcommitted
fsck: use oidset instead of oid_array for skipList
Change the implementation of the skipList feature to use oidset instead of oid_array to store SHA-1s for later lookup. This list is parsed once on startup by fsck, fetch-pack or receive-pack depending on the *.skipList config in use. I.e. only once per invocation, but note that for "clone --recurse-submodules" each submodule will re-parse the list, in addition to the main project, and it will be re-parsed when checking .gitmodules blobs, see fb16287 ("fsck: check skiplist for object in fsck_blob()", 2018-06-27). Memory usage is a bit higher, but we don't need to keep track of the sort order anymore. Embed the oidset into struct fsck_options to make its ownership clear (no hidden sharing) and avoid unnecessary pointer indirection. The cumulative impact on performance of this & the preceding change, using the test setup described in the previous commit: Test HEAD~2 HEAD~ HEAD ---------------------------------------------------------------------------------------------------------------- 1450.3: fsck with 0 skipped bad commits 7.70(7.31+0.38) 7.72(7.33+0.38) +0.3% 7.70(7.30+0.40) +0.0% 1450.5: fsck with 1 skipped bad commits 7.84(7.47+0.37) 7.69(7.32+0.36) -1.9% 7.71(7.29+0.41) -1.7% 1450.7: fsck with 10 skipped bad commits 7.81(7.40+0.40) 7.94(7.57+0.36) +1.7% 7.92(7.55+0.37) +1.4% 1450.9: fsck with 100 skipped bad commits 7.81(7.42+0.38) 7.95(7.53+0.41) +1.8% 7.83(7.42+0.41) +0.3% 1450.11: fsck with 1000 skipped bad commits 7.99(7.62+0.36) 7.90(7.50+0.40) -1.1% 7.86(7.49+0.37) -1.6% 1450.13: fsck with 10000 skipped bad commits 7.98(7.57+0.40) 7.94(7.53+0.40) -0.5% 7.90(7.45+0.44) -1.0% 1450.15: fsck with 100000 skipped bad commits 7.97(7.57+0.39) 8.03(7.67+0.36) +0.8% 7.84(7.43+0.41) -1.6% 1450.17: fsck with 1000000 skipped bad commits 7.72(7.22+0.50) 7.28(7.07+0.20) -5.7% 7.13(6.87+0.25) -7.6% Helped-by: Ævar Arnfjörð Bjarmason <[email protected]> Signed-off-by: Rene Scharfe <[email protected]> Signed-off-by: Ævar Arnfjörð Bjarmason <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent fb89520 commit 3b41fb0

File tree

3 files changed

+13
-29
lines changed

3 files changed

+13
-29
lines changed

Documentation/config.txt

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1731,11 +1731,12 @@ all three of them they must all set to the same values.
17311731
+
17321732
Older versions of Git (before 2.20) documented that the object names
17331733
list should be sorted. This was never a requirement, the object names
1734-
can appear in any order, but when reading the list we track whether
1735-
the list is sorted for the purposes of an internal binary search
1736-
implementation, which can save itself some work with an already sorted
1737-
list. Unless you have a humongous list there's no reason to go out of
1738-
your way to pre-sort the list.
1734+
could appear in any order, but when reading the list we tracked whether
1735+
the list was sorted for the purposes of an internal binary search
1736+
implementation, which could save itself some work with an already sorted
1737+
list. Unless you had a humongous list there was no reason to go out of
1738+
your way to pre-sort the list. After Git version 2.20 a hash implementation
1739+
is used instead, so there's now no reason to pre-sort the list.
17391740

17401741
gc.aggressiveDepth::
17411742
The depth parameter used in the delta compression

fsck.c

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
#include "fsck.h"
1111
#include "refs.h"
1212
#include "utf8.h"
13-
#include "sha1-array.h"
1413
#include "decorate.h"
1514
#include "oidset.h"
1615
#include "packfile.h"
@@ -182,39 +181,23 @@ static int fsck_msg_type(enum fsck_msg_id msg_id,
182181

183182
static void init_skiplist(struct fsck_options *options, const char *path)
184183
{
185-
static struct oid_array skiplist = OID_ARRAY_INIT;
186-
int sorted;
187184
FILE *fp;
188185
struct strbuf sb = STRBUF_INIT;
189186
struct object_id oid;
190187

191-
if (options->skiplist)
192-
sorted = options->skiplist->sorted;
193-
else {
194-
sorted = 1;
195-
options->skiplist = &skiplist;
196-
}
197-
198188
fp = fopen(path, "r");
199189
if (!fp)
200190
die("Could not open skip list: %s", path);
201191
while (!strbuf_getline(&sb, fp)) {
202192
const char *p;
203193
if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0')
204194
die("Invalid SHA-1: %s", sb.buf);
205-
oid_array_append(&skiplist, &oid);
206-
if (sorted && skiplist.nr > 1 &&
207-
oidcmp(&skiplist.oid[skiplist.nr - 2],
208-
&oid) > 0)
209-
sorted = 0;
195+
oidset_insert(&options->skiplist, &oid);
210196
}
211197
if (ferror(fp))
212198
die_errno("Could not read '%s'", path);
213199
fclose(fp);
214200
strbuf_release(&sb);
215-
216-
if (sorted)
217-
skiplist.sorted = 1;
218201
}
219202

220203
static int parse_msg_type(const char *str)
@@ -319,9 +302,7 @@ static void append_msg_id(struct strbuf *sb, const char *msg_id)
319302

320303
static int object_on_skiplist(struct fsck_options *opts, struct object *obj)
321304
{
322-
if (opts && opts->skiplist && obj)
323-
return oid_array_lookup(opts->skiplist, &obj->oid) >= 0;
324-
return 0;
305+
return opts && obj && oidset_contains(&opts->skiplist, &obj->oid);
325306
}
326307

327308
__attribute__((format (printf, 4, 5)))

fsck.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#ifndef GIT_FSCK_H
22
#define GIT_FSCK_H
33

4+
#include "oidset.h"
5+
46
#define FSCK_ERROR 1
57
#define FSCK_WARN 2
68
#define FSCK_IGNORE 3
@@ -35,12 +37,12 @@ struct fsck_options {
3537
fsck_error error_func;
3638
unsigned strict:1;
3739
int *msg_type;
38-
struct oid_array *skiplist;
40+
struct oidset skiplist;
3941
struct decoration *object_names;
4042
};
4143

42-
#define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL }
43-
#define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL }
44+
#define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL, OIDSET_INIT }
45+
#define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL, OIDSET_INIT }
4446

4547
/* descend in all linked child objects
4648
* the return value is:

0 commit comments

Comments
 (0)