Skip to content

Commit ad90968

Browse files
committed
Merge branch 'tb/repack-expire-to'
"git repack" learns to send cruft objects out of the way into packfiles outside the repository. * tb/repack-expire-to: builtin/repack.c: implement `--expire-to` for storing pruned objects builtin/repack.c: write cruft packs to arbitrary locations builtin/repack.c: pass "cruft_expiration" to `write_cruft_pack` builtin/repack.c: pass "out" to `prepare_pack_objects`
2 parents e53598a + 91badeb commit ad90968

File tree

3 files changed

+188
-10
lines changed

3 files changed

+188
-10
lines changed

Documentation/git-repack.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,12 @@ to the new separate pack will be written.
7474
immediately instead of waiting for the next `git gc` invocation.
7575
Only useful with `--cruft -d`.
7676

77+
--expire-to=<dir>::
78+
Write a cruft pack containing pruned objects (if any) to the
79+
directory `<dir>`. This option is useful for keeping a copy of
80+
any pruned objects in a separate directory as a backup. Only
81+
useful with `--cruft -d`.
82+
7783
-l::
7884
Pass the `--local` option to 'git pack-objects'. See
7985
linkgit:git-pack-objects[1].

builtin/repack.c

Lines changed: 61 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ static int write_bitmaps = -1;
3232
static int use_delta_islands;
3333
static int run_update_server_info = 1;
3434
static char *packdir, *packtmp_name, *packtmp;
35-
static char *cruft_expiration;
3635

3736
static const char *const git_repack_usage[] = {
3837
N_("git repack [<options>]"),
@@ -150,7 +149,8 @@ static void remove_redundant_pack(const char *dir_name, const char *base_name)
150149
}
151150

152151
static void prepare_pack_objects(struct child_process *cmd,
153-
const struct pack_objects_args *args)
152+
const struct pack_objects_args *args,
153+
const char *out)
154154
{
155155
strvec_push(&cmd->args, "pack-objects");
156156
if (args->window)
@@ -173,7 +173,7 @@ static void prepare_pack_objects(struct child_process *cmd,
173173
strvec_push(&cmd->args, "--quiet");
174174
if (delta_base_offset)
175175
strvec_push(&cmd->args, "--delta-base-offset");
176-
strvec_push(&cmd->args, packtmp);
176+
strvec_push(&cmd->args, out);
177177
cmd->git_cmd = 1;
178178
cmd->out = -1;
179179
}
@@ -241,7 +241,7 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
241241
FILE *out;
242242
struct strbuf line = STRBUF_INIT;
243243

244-
prepare_pack_objects(&cmd, args);
244+
prepare_pack_objects(&cmd, args, packtmp);
245245
cmd.in = -1;
246246

247247
/*
@@ -657,7 +657,9 @@ static void remove_redundant_bitmaps(struct string_list *include,
657657
}
658658

659659
static int write_cruft_pack(const struct pack_objects_args *args,
660+
const char *destination,
660661
const char *pack_prefix,
662+
const char *cruft_expiration,
661663
struct string_list *names,
662664
struct string_list *existing_packs,
663665
struct string_list *existing_kept_packs)
@@ -667,8 +669,10 @@ static int write_cruft_pack(const struct pack_objects_args *args,
667669
struct string_list_item *item;
668670
FILE *in, *out;
669671
int ret;
672+
const char *scratch;
673+
int local = skip_prefix(destination, packdir, &scratch);
670674

671-
prepare_pack_objects(&cmd, args);
675+
prepare_pack_objects(&cmd, args, destination);
672676

673677
strvec_push(&cmd.args, "--cruft");
674678
if (cruft_expiration)
@@ -693,6 +697,10 @@ static int write_cruft_pack(const struct pack_objects_args *args,
693697
* By the time it is read here, it contains only the pack(s)
694698
* that were just written, which is exactly the set of packs we
695699
* want to consider kept.
700+
*
701+
* If `--expire-to` is given, the double-use served by `names`
702+
* ensures that the pack written to `--expire-to` excludes any
703+
* objects contained in the cruft pack.
696704
*/
697705
in = xfdopen(cmd.in, "w");
698706
for_each_string_list_item(item, names)
@@ -710,9 +718,14 @@ static int write_cruft_pack(const struct pack_objects_args *args,
710718
if (line.len != the_hash_algo->hexsz)
711719
die(_("repack: Expecting full hex object ID lines only "
712720
"from pack-objects."));
713-
714-
item = string_list_append(names, line.buf);
715-
item->util = populate_pack_exts(line.buf);
721+
/*
722+
* avoid putting packs written outside of the repository in the
723+
* list of names
724+
*/
725+
if (local) {
726+
item = string_list_append(names, line.buf);
727+
item->util = populate_pack_exts(line.buf);
728+
}
716729
}
717730
fclose(out);
718731

@@ -744,6 +757,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
744757
struct pack_objects_args cruft_po_args = {NULL};
745758
int geometric_factor = 0;
746759
int write_midx = 0;
760+
const char *cruft_expiration = NULL;
761+
const char *expire_to = NULL;
747762

748763
struct option builtin_repack_options[] = {
749764
OPT_BIT('a', NULL, &pack_everything,
@@ -793,6 +808,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
793808
N_("find a geometric progression with factor <N>")),
794809
OPT_BOOL('m', "write-midx", &write_midx,
795810
N_("write a multi-pack index of the resulting packs")),
811+
OPT_STRING(0, "expire-to", &expire_to, N_("dir"),
812+
N_("pack prefix to store a pack containing pruned objects")),
796813
OPT_END()
797814
};
798815

@@ -858,7 +875,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
858875
split_pack_geometry(geometry, geometric_factor);
859876
}
860877

861-
prepare_pack_objects(&cmd, &po_args);
878+
prepare_pack_objects(&cmd, &po_args, packtmp);
862879

863880
show_progress = !po_args.quiet && isatty(2);
864881

@@ -984,11 +1001,45 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
9841001
cruft_po_args.local = po_args.local;
9851002
cruft_po_args.quiet = po_args.quiet;
9861003

987-
ret = write_cruft_pack(&cruft_po_args, pack_prefix, &names,
1004+
ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix,
1005+
cruft_expiration, &names,
9881006
&existing_nonkept_packs,
9891007
&existing_kept_packs);
9901008
if (ret)
9911009
return ret;
1010+
1011+
if (delete_redundant && expire_to) {
1012+
/*
1013+
* If `--expire-to` is given with `-d`, it's possible
1014+
* that we're about to prune some objects. With cruft
1015+
* packs, pruning is implicit: any objects from existing
1016+
* packs that weren't picked up by new packs are removed
1017+
* when their packs are deleted.
1018+
*
1019+
* Generate an additional cruft pack, with one twist:
1020+
* `names` now includes the name of the cruft pack
1021+
* written in the previous step. So the contents of
1022+
* _this_ cruft pack exclude everything contained in the
1023+
* existing cruft pack (that is, all of the unreachable
1024+
* objects which are no older than
1025+
* `--cruft-expiration`).
1026+
*
1027+
* To make this work, cruft_expiration must become NULL
1028+
* so that this cruft pack doesn't actually prune any
1029+
* objects. If it were non-NULL, this call would always
1030+
* generate an empty pack (since every object not in the
1031+
* cruft pack generated above will have an mtime older
1032+
* than the expiration).
1033+
*/
1034+
ret = write_cruft_pack(&cruft_po_args, expire_to,
1035+
pack_prefix,
1036+
NULL,
1037+
&names,
1038+
&existing_nonkept_packs,
1039+
&existing_kept_packs);
1040+
if (ret)
1041+
return ret;
1042+
}
9921043
}
9931044

9941045
string_list_sort(&names);

t/t7700-repack.sh

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -543,4 +543,125 @@ test_expect_success '-n overrides repack.updateServerInfo=true' '
543543
test_server_info_missing
544544
'
545545

546+
test_expect_success '--expire-to stores pruned objects (now)' '
547+
git init expire-to-now &&
548+
(
549+
cd expire-to-now &&
550+
551+
git branch -M main &&
552+
553+
test_commit base &&
554+
555+
git checkout -b cruft &&
556+
test_commit --no-tag cruft &&
557+
558+
git rev-list --objects --no-object-names main..cruft >moved.raw &&
559+
sort moved.raw >moved.want &&
560+
561+
git rev-list --all --objects --no-object-names >expect.raw &&
562+
sort expect.raw >expect &&
563+
564+
git checkout main &&
565+
git branch -D cruft &&
566+
git reflog expire --all --expire=all &&
567+
568+
git init --bare expired.git &&
569+
git repack -d \
570+
--cruft --cruft-expiration="now" \
571+
--expire-to="expired.git/objects/pack/pack" &&
572+
573+
expired="$(ls expired.git/objects/pack/pack-*.idx)" &&
574+
test_path_is_file "${expired%.idx}.mtimes" &&
575+
576+
# Since the `--cruft-expiration` is "now", the effective
577+
# behavior is to move _all_ unreachable objects out to
578+
# the location in `--expire-to`.
579+
git show-index <$expired >expired.raw &&
580+
cut -d" " -f2 expired.raw | sort >expired.objects &&
581+
git rev-list --all --objects --no-object-names \
582+
>remaining.objects &&
583+
584+
# ...in other words, the combined contents of this
585+
# repository and expired.git should be the same as the
586+
# set of objects we started with.
587+
cat expired.objects remaining.objects | sort >actual &&
588+
test_cmp expect actual &&
589+
590+
# The "moved" objects (i.e., those in expired.git)
591+
# should be the same as the cruft objects which were
592+
# expired in the previous step.
593+
test_cmp moved.want expired.objects
594+
)
595+
'
596+
597+
test_expect_success '--expire-to stores pruned objects (5.minutes.ago)' '
598+
git init expire-to-5.minutes.ago &&
599+
(
600+
cd expire-to-5.minutes.ago &&
601+
602+
git branch -M main &&
603+
604+
test_commit base &&
605+
606+
# Create two classes of unreachable objects, one which
607+
# is older than 5 minutes (stale), and another which is
608+
# newer (recent).
609+
for kind in stale recent
610+
do
611+
git checkout -b $kind main &&
612+
test_commit --no-tag $kind || return 1
613+
done &&
614+
615+
git rev-list --objects --no-object-names main..stale >in &&
616+
stale="$(git pack-objects $objdir/pack/pack <in)" &&
617+
mtime="$(test-tool chmtime --get =-600 $objdir/pack/pack-$stale.pack)" &&
618+
619+
# expect holds the set of objects we expect to find in
620+
# this repository after repacking
621+
git rev-list --objects --no-object-names recent >expect.raw &&
622+
sort expect.raw >expect &&
623+
624+
# moved.want holds the set of objects we expect to find
625+
# in expired.git
626+
git rev-list --objects --no-object-names main..stale >out &&
627+
sort out >moved.want &&
628+
629+
git checkout main &&
630+
git branch -D stale recent &&
631+
git reflog expire --all --expire=all &&
632+
git prune-packed &&
633+
634+
git init --bare expired.git &&
635+
git repack -d \
636+
--cruft --cruft-expiration=5.minutes.ago \
637+
--expire-to="expired.git/objects/pack/pack" &&
638+
639+
# Some of the remaining objects in this repository are
640+
# unreachable, so use `cat-file --batch-all-objects`
641+
# instead of `rev-list` to get their names
642+
git cat-file --batch-all-objects --batch-check="%(objectname)" \
643+
>remaining.objects &&
644+
sort remaining.objects >actual &&
645+
test_cmp expect actual &&
646+
647+
(
648+
cd expired.git &&
649+
650+
expired="$(ls objects/pack/pack-*.mtimes)" &&
651+
test-tool pack-mtimes $(basename $expired) >out &&
652+
cut -d" " -f1 out | sort >../moved.got &&
653+
654+
# Ensure that there are as many objects with the
655+
# expected mtime as were moved to expired.git.
656+
#
657+
# In other words, ensure that the recorded
658+
# mtimes of any moved objects was written
659+
# correctly.
660+
grep " $mtime$" out >matching &&
661+
test_line_count = $(wc -l <../moved.want) matching
662+
) &&
663+
test_cmp moved.want moved.got
664+
)
665+
'
666+
546667
test_done

0 commit comments

Comments
 (0)