Skip to content

Commit d55a30b

Browse files
peffgitster
authored andcommitted
prune: lazily perform reachability traversal
The general strategy of "git prune" is to do a full reachability walk, then for each loose object see if we found it in our walk. But if we don't have any loose objects, we don't need to do the expensive walk in the first place. This patch postpones that walk until the first time we need to see its results. Note that this is really a specific case of a more general optimization, which is that we could traverse only far enough to find the object under consideration (i.e., stop the traversal when we find it, then pick up again when asked about the next object, etc). That could save us in some instances from having to do a full walk. But it's actually a bit tricky to do with our traversal code, and you'd need to do a full walk anyway if you have even a single unreachable object (which you generally do, if any objects are actually left after running git-repack). So in practice this lazy-load of the full walk catches one easy but common case (i.e., you've just repacked via git-gc, and there's nothing unreachable). The perf script is fairly contrived, but it does show off the improvement: Test HEAD^ HEAD ------------------------------------------------------------------------- 5304.4: prune with no objects 3.66(3.60+0.05) 0.00(0.00+0.00) -100.0% and would let us know if we accidentally regress this optimization. Note also that we need to take special care with prune_shallow(), which relies on us having performed the traversal. So this optimization can only kick in for a non-shallow repository. Since this is easy to get wrong and is not covered by existing tests, let's add an extra test to t5304 that covers this case explicitly. Signed-off-by: Jeff King <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 2d08f3d commit d55a30b

File tree

3 files changed

+68
-11
lines changed

3 files changed

+68
-11
lines changed

builtin/prune.c

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,40 @@ static int prune_tmp_file(const char *fullpath)
3131
return 0;
3232
}
3333

34-
static int prune_object(const struct object_id *oid, const char *fullpath,
35-
void *data)
34+
static void perform_reachability_traversal(struct rev_info *revs)
3635
{
37-
struct stat st;
36+
static int initialized;
37+
struct progress *progress = NULL;
38+
39+
if (initialized)
40+
return;
41+
42+
if (show_progress)
43+
progress = start_delayed_progress(_("Checking connectivity"), 0);
44+
mark_reachable_objects(revs, 1, expire, progress);
45+
stop_progress(&progress);
46+
initialized = 1;
47+
}
48+
49+
static int is_object_reachable(const struct object_id *oid,
50+
struct rev_info *revs)
51+
{
52+
perform_reachability_traversal(revs);
3853

3954
/*
4055
* Do we know about this object?
4156
* It must have been reachable
4257
*/
43-
if (lookup_object(the_repository, oid->hash))
58+
return !!lookup_object(the_repository, oid->hash);
59+
}
60+
61+
static int prune_object(const struct object_id *oid, const char *fullpath,
62+
void *data)
63+
{
64+
struct rev_info *revs = data;
65+
struct stat st;
66+
67+
if (is_object_reachable(oid, revs))
4468
return 0;
4569

4670
if (lstat(fullpath, &st)) {
@@ -102,7 +126,6 @@ static void remove_temporary_files(const char *path)
102126
int cmd_prune(int argc, const char **argv, const char *prefix)
103127
{
104128
struct rev_info revs;
105-
struct progress *progress = NULL;
106129
int exclude_promisor_objects = 0;
107130
const struct option options[] = {
108131
OPT__DRY_RUN(&show_only, N_("do not remove, show only")),
@@ -142,26 +165,24 @@ int cmd_prune(int argc, const char **argv, const char *prefix)
142165

143166
if (show_progress == -1)
144167
show_progress = isatty(2);
145-
if (show_progress)
146-
progress = start_delayed_progress(_("Checking connectivity"), 0);
147168
if (exclude_promisor_objects) {
148169
fetch_if_missing = 0;
149170
revs.exclude_promisor_objects = 1;
150171
}
151172

152-
mark_reachable_objects(&revs, 1, expire, progress);
153-
stop_progress(&progress);
154173
for_each_loose_file_in_objdir(get_object_directory(), prune_object,
155-
prune_cruft, prune_subdir, NULL);
174+
prune_cruft, prune_subdir, &revs);
156175

157176
prune_packed_objects(show_only ? PRUNE_PACKED_DRY_RUN : 0);
158177
remove_temporary_files(get_object_directory());
159178
s = mkpathdup("%s/pack", get_object_directory());
160179
remove_temporary_files(s);
161180
free(s);
162181

163-
if (is_repository_shallow(the_repository))
182+
if (is_repository_shallow(the_repository)) {
183+
perform_reachability_traversal(&revs);
164184
prune_shallow(show_only ? PRUNE_SHOW_ONLY : 0);
185+
}
165186

166187
return 0;
167188
}

t/perf/p5304-prune.sh

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/bin/sh
2+
3+
test_description='performance tests of prune'
4+
. ./perf-lib.sh
5+
6+
test_perf_default_repo
7+
8+
test_expect_success 'remove reachable loose objects' '
9+
git repack -ad
10+
'
11+
12+
test_expect_success 'remove unreachable loose objects' '
13+
git prune
14+
'
15+
16+
test_expect_success 'confirm there are no loose objects' '
17+
git count-objects | grep ^0
18+
'
19+
20+
test_perf 'prune with no objects' '
21+
git prune
22+
'
23+
24+
test_done

t/t5304-prune.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,18 @@ test_expect_success 'prune .git/shallow' '
274274
test_path_is_missing .git/shallow
275275
'
276276

277+
test_expect_success 'prune .git/shallow when there are no loose objects' '
278+
SHA1=$(echo hi|git commit-tree HEAD^{tree}) &&
279+
echo $SHA1 >.git/shallow &&
280+
git update-ref refs/heads/shallow-tip $SHA1 &&
281+
git repack -ad &&
282+
# verify assumption that all loose objects are gone
283+
git count-objects | grep ^0 &&
284+
git prune &&
285+
echo $SHA1 >expect &&
286+
test_cmp expect .git/shallow
287+
'
288+
277289
test_expect_success 'prune: handle alternate object database' '
278290
test_create_repo A &&
279291
git -C A commit --allow-empty -m "initial commit" &&

0 commit comments

Comments
 (0)