Skip to content

Commit 58ecb2e

Browse files
committed
Merge branch 'tb/gc-recent-object-hook'
"git pack-objects" learned to invoke a new hook program that enumerates extra objects to be used as anchoring points to keep otherwise unreachable objects in cruft packs. * tb/gc-recent-object-hook: gc: introduce `gc.recentObjectsHook` reachable.c: extract `obj_is_recent()`
2 parents 891e631 + 4dc16e2 commit 58ecb2e

File tree

5 files changed

+313
-3
lines changed

5 files changed

+313
-3
lines changed

Documentation/config/gc.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,21 @@ or rebase occurring. Since these changes are not part of the current
130130
project most users will want to expire them sooner, which is why the
131131
default is more aggressive than `gc.reflogExpire`.
132132

133+
gc.recentObjectsHook::
134+
When considering whether or not to remove an object (either when
135+
generating a cruft pack or storing unreachable objects as
136+
loose), use the shell to execute the specified command(s).
137+
Interpret their output as object IDs which Git will consider as
138+
"recent", regardless of their age. By treating their mtimes as
139+
"now", any objects (and their descendants) mentioned in the
140+
output will be kept regardless of their true age.
141+
+
142+
Output must contain exactly one hex object ID per line, and nothing
143+
else. Objects which cannot be found in the repository are ignored.
144+
Multiple hooks are supported, but all must exit successfully, else the
145+
operation (either generating a cruft pack or unpacking unreachable
146+
objects) will be halted.
147+
133148
gc.rerereResolved::
134149
Records of conflicted merge you resolved earlier are
135150
kept for this many days when 'git rerere gc' is run.

reachable.c

Lines changed: 82 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
#include "object-store.h"
1717
#include "pack-bitmap.h"
1818
#include "pack-mtimes.h"
19+
#include "config.h"
20+
#include "run-command.h"
1921

2022
struct connectivity_progress {
2123
struct progress *progress;
@@ -67,8 +69,77 @@ struct recent_data {
6769
timestamp_t timestamp;
6870
report_recent_object_fn *cb;
6971
int ignore_in_core_kept_packs;
72+
73+
struct oidset extra_recent_oids;
74+
int extra_recent_oids_loaded;
7075
};
7176

77+
static int run_one_gc_recent_objects_hook(struct oidset *set,
78+
const char *args)
79+
{
80+
struct child_process cmd = CHILD_PROCESS_INIT;
81+
struct strbuf buf = STRBUF_INIT;
82+
FILE *out;
83+
int ret = 0;
84+
85+
cmd.use_shell = 1;
86+
cmd.out = -1;
87+
88+
strvec_push(&cmd.args, args);
89+
90+
if (start_command(&cmd))
91+
return -1;
92+
93+
out = xfdopen(cmd.out, "r");
94+
while (strbuf_getline(&buf, out) != EOF) {
95+
struct object_id oid;
96+
const char *rest;
97+
98+
if (parse_oid_hex(buf.buf, &oid, &rest) || *rest) {
99+
ret = error(_("invalid extra cruft tip: '%s'"), buf.buf);
100+
break;
101+
}
102+
103+
oidset_insert(set, &oid);
104+
}
105+
106+
fclose(out);
107+
ret |= finish_command(&cmd);
108+
109+
strbuf_release(&buf);
110+
return ret;
111+
}
112+
113+
static void load_gc_recent_objects(struct recent_data *data)
114+
{
115+
const struct string_list *programs;
116+
int ret = 0;
117+
size_t i;
118+
119+
data->extra_recent_oids_loaded = 1;
120+
121+
if (git_config_get_string_multi("gc.recentobjectshook", &programs))
122+
return;
123+
124+
for (i = 0; i < programs->nr; i++) {
125+
ret = run_one_gc_recent_objects_hook(&data->extra_recent_oids,
126+
programs->items[i].string);
127+
if (ret)
128+
die(_("unable to enumerate additional recent objects"));
129+
}
130+
}
131+
132+
static int obj_is_recent(const struct object_id *oid, timestamp_t mtime,
133+
struct recent_data *data)
134+
{
135+
if (mtime > data->timestamp)
136+
return 1;
137+
138+
if (!data->extra_recent_oids_loaded)
139+
load_gc_recent_objects(data);
140+
return oidset_contains(&data->extra_recent_oids, oid);
141+
}
142+
72143
static void add_recent_object(const struct object_id *oid,
73144
struct packed_git *pack,
74145
off_t offset,
@@ -78,7 +149,7 @@ static void add_recent_object(const struct object_id *oid,
78149
struct object *obj;
79150
enum object_type type;
80151

81-
if (mtime <= data->timestamp)
152+
if (!obj_is_recent(oid, mtime, data))
82153
return;
83154

84155
/*
@@ -193,16 +264,24 @@ int add_unseen_recent_objects_to_traversal(struct rev_info *revs,
193264
data.cb = cb;
194265
data.ignore_in_core_kept_packs = ignore_in_core_kept_packs;
195266

267+
oidset_init(&data.extra_recent_oids, 0);
268+
data.extra_recent_oids_loaded = 0;
269+
196270
r = for_each_loose_object(add_recent_loose, &data,
197271
FOR_EACH_OBJECT_LOCAL_ONLY);
198272
if (r)
199-
return r;
273+
goto done;
200274

201275
flags = FOR_EACH_OBJECT_LOCAL_ONLY | FOR_EACH_OBJECT_PACK_ORDER;
202276
if (ignore_in_core_kept_packs)
203277
flags |= FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS;
204278

205-
return for_each_packed_object(add_recent_packed, &data, flags);
279+
r = for_each_packed_object(add_recent_packed, &data, flags);
280+
281+
done:
282+
oidset_clear(&data.extra_recent_oids);
283+
284+
return r;
206285
}
207286

208287
static int mark_object_seen(const struct object_id *oid,

t/t5304-prune.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,4 +350,18 @@ test_expect_success 'old reachable-from-recent retained with bitmaps' '
350350
test_must_fail git cat-file -e $to_drop
351351
'
352352

353+
test_expect_success 'gc.recentObjectsHook' '
354+
add_blob &&
355+
test-tool chmtime =-86500 $BLOB_FILE &&
356+
357+
write_script precious-objects <<-EOF &&
358+
echo $BLOB
359+
EOF
360+
test_config gc.recentObjectsHook ./precious-objects &&
361+
362+
git prune --expire=now &&
363+
364+
git cat-file -p $BLOB
365+
'
366+
353367
test_done

t/t5329-pack-objects-cruft.sh

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -739,4 +739,175 @@ test_expect_success 'cruft objects are freshend via loose' '
739739
)
740740
'
741741

742+
test_expect_success 'gc.recentObjectsHook' '
743+
git init repo &&
744+
test_when_finished "rm -fr repo" &&
745+
(
746+
cd repo &&
747+
748+
# Create a handful of objects.
749+
#
750+
# - one reachable commit, "base", designated for the reachable
751+
# pack
752+
# - one unreachable commit, "cruft.discard", which is marked
753+
# for deletion
754+
# - one unreachable commit, "cruft.old", which would be marked
755+
# for deletion, but is rescued as an extra cruft tip
756+
# - one unreachable commit, "cruft.new", which is not marked
757+
# for deletion
758+
test_commit base &&
759+
git branch -M main &&
760+
761+
git checkout --orphan discard &&
762+
git rm -fr . &&
763+
test_commit --no-tag cruft.discard &&
764+
765+
git checkout --orphan old &&
766+
git rm -fr . &&
767+
test_commit --no-tag cruft.old &&
768+
cruft_old="$(git rev-parse HEAD)" &&
769+
770+
git checkout --orphan new &&
771+
git rm -fr . &&
772+
test_commit --no-tag cruft.new &&
773+
cruft_new="$(git rev-parse HEAD)" &&
774+
775+
git checkout main &&
776+
git branch -D discard old new &&
777+
git reflog expire --all --expire=all &&
778+
779+
# mark cruft.old with an mtime that is many minutes
780+
# older than the expiration period, and mark cruft.new
781+
# with an mtime that is in the future (and thus not
782+
# eligible for pruning).
783+
test-tool chmtime -2000 "$objdir/$(test_oid_to_path $cruft_old)" &&
784+
test-tool chmtime +1000 "$objdir/$(test_oid_to_path $cruft_new)" &&
785+
786+
# Write the list of cruft objects we expect to
787+
# accumulate, which is comprised of everything reachable
788+
# from cruft.old and cruft.new, but not cruft.discard.
789+
git rev-list --objects --no-object-names \
790+
$cruft_old $cruft_new >cruft.raw &&
791+
sort cruft.raw >cruft.expect &&
792+
793+
# Write the script to list extra tips, which are limited
794+
# to cruft.old, in this case.
795+
write_script extra-tips <<-EOF &&
796+
echo $cruft_old
797+
EOF
798+
git config gc.recentObjectsHook ./extra-tips &&
799+
800+
git repack --cruft --cruft-expiration=now -d &&
801+
802+
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
803+
git show-index <${mtimes%.mtimes}.idx >cruft &&
804+
cut -d" " -f2 cruft | sort >cruft.actual &&
805+
test_cmp cruft.expect cruft.actual &&
806+
807+
# Ensure that the "old" objects are removed after
808+
# dropping the gc.recentObjectsHook hook.
809+
git config --unset gc.recentObjectsHook &&
810+
git repack --cruft --cruft-expiration=now -d &&
811+
812+
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
813+
git show-index <${mtimes%.mtimes}.idx >cruft &&
814+
cut -d" " -f2 cruft | sort >cruft.actual &&
815+
816+
git rev-list --objects --no-object-names $cruft_new >cruft.raw &&
817+
cp cruft.expect cruft.old &&
818+
sort cruft.raw >cruft.expect &&
819+
test_cmp cruft.expect cruft.actual &&
820+
821+
# ensure objects which are no longer in the cruft pack were
822+
# removed from the repository
823+
for object in $(comm -13 cruft.expect cruft.old)
824+
do
825+
test_must_fail git cat-file -t $object || return 1
826+
done
827+
)
828+
'
829+
830+
test_expect_success 'multi-valued gc.recentObjectsHook' '
831+
git init repo &&
832+
test_when_finished "rm -fr repo" &&
833+
(
834+
cd repo &&
835+
836+
test_commit base &&
837+
git branch -M main &&
838+
839+
git checkout --orphan cruft.a &&
840+
git rm -fr . &&
841+
test_commit --no-tag cruft.a &&
842+
cruft_a="$(git rev-parse HEAD)" &&
843+
844+
git checkout --orphan cruft.b &&
845+
git rm -fr . &&
846+
test_commit --no-tag cruft.b &&
847+
cruft_b="$(git rev-parse HEAD)" &&
848+
849+
git checkout main &&
850+
git branch -D cruft.a cruft.b &&
851+
git reflog expire --all --expire=all &&
852+
853+
echo "echo $cruft_a" | write_script extra-tips.a &&
854+
echo "echo $cruft_b" | write_script extra-tips.b &&
855+
echo "false" | write_script extra-tips.c &&
856+
857+
git rev-list --objects --no-object-names $cruft_a $cruft_b \
858+
>cruft.raw &&
859+
sort cruft.raw >cruft.expect &&
860+
861+
# ensure that each extra cruft tip is saved by its
862+
# respective hook
863+
git config --add gc.recentObjectsHook ./extra-tips.a &&
864+
git config --add gc.recentObjectsHook ./extra-tips.b &&
865+
git repack --cruft --cruft-expiration=now -d &&
866+
867+
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
868+
git show-index <${mtimes%.mtimes}.idx >cruft &&
869+
cut -d" " -f2 cruft | sort >cruft.actual &&
870+
test_cmp cruft.expect cruft.actual &&
871+
872+
# ensure that a dirty exit halts cruft pack generation
873+
git config --add gc.recentObjectsHook ./extra-tips.c &&
874+
test_must_fail git repack --cruft --cruft-expiration=now -d 2>err &&
875+
grep "unable to enumerate additional recent objects" err &&
876+
877+
# and that the existing cruft pack is left alone
878+
test_path_is_file "$mtimes"
879+
)
880+
'
881+
882+
test_expect_success 'additional cruft blobs via gc.recentObjectsHook' '
883+
git init repo &&
884+
test_when_finished "rm -fr repo" &&
885+
(
886+
cd repo &&
887+
888+
test_commit base &&
889+
890+
blob=$(echo "unreachable" | git hash-object -w --stdin) &&
891+
892+
# mark the unreachable blob we wrote above as having
893+
# aged out of the retention period
894+
test-tool chmtime -2000 "$objdir/$(test_oid_to_path $blob)" &&
895+
896+
# Write the script to list extra tips, which is just the
897+
# extra blob as above.
898+
write_script extra-tips <<-EOF &&
899+
echo $blob
900+
EOF
901+
git config gc.recentObjectsHook ./extra-tips &&
902+
903+
git repack --cruft --cruft-expiration=now -d &&
904+
905+
mtimes="$(ls .git/objects/pack/pack-*.mtimes)" &&
906+
git show-index <${mtimes%.mtimes}.idx >cruft &&
907+
cut -d" " -f2 cruft >actual &&
908+
echo $blob >expect &&
909+
test_cmp expect actual
910+
)
911+
'
912+
742913
test_done

t/t7701-repack-unpack-unreachable.sh

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,37 @@ test_expect_success 'do not bother loosening old objects' '
113113
test_must_fail git cat-file -p $obj2
114114
'
115115

116+
test_expect_success 'gc.recentObjectsHook' '
117+
obj1=$(echo one | git hash-object -w --stdin) &&
118+
obj2=$(echo two | git hash-object -w --stdin) &&
119+
obj3=$(echo three | git hash-object -w --stdin) &&
120+
pack1=$(echo $obj1 | git pack-objects .git/objects/pack/pack) &&
121+
pack2=$(echo $obj2 | git pack-objects .git/objects/pack/pack) &&
122+
pack3=$(echo $obj3 | git pack-objects .git/objects/pack/pack) &&
123+
git prune-packed &&
124+
125+
git cat-file -p $obj1 &&
126+
git cat-file -p $obj2 &&
127+
git cat-file -p $obj3 &&
128+
129+
git tag -a -m tag obj2-tag $obj2 &&
130+
obj2_tag="$(git rev-parse obj2-tag)" &&
131+
132+
write_script precious-objects <<-EOF &&
133+
echo $obj2_tag
134+
EOF
135+
git config gc.recentObjectsHook ./precious-objects &&
136+
137+
test-tool chmtime =-86400 .git/objects/pack/pack-$pack2.pack &&
138+
test-tool chmtime =-86400 .git/objects/pack/pack-$pack3.pack &&
139+
git repack -A -d --unpack-unreachable=1.hour.ago &&
140+
141+
git cat-file -p $obj1 &&
142+
git cat-file -p $obj2 &&
143+
git cat-file -p $obj2_tag &&
144+
test_must_fail git cat-file -p $obj3
145+
'
146+
116147
test_expect_success 'keep packed objects found only in index' '
117148
echo my-unique-content >file &&
118149
git add file &&

0 commit comments

Comments
 (0)