Skip to content

Commit 484d7ad

Browse files
ttaylorrgitster
authored andcommitted
repack: begin combining cruft packs with --combine-cruft-below-size
The previous commit changed the behavior of repack's '--max-cruft-size' to specify a cruft pack-specific override for '--max-pack-size'. Introduce a new flag, '--combine-cruft-below-size' which is a replacement for the old behavior of '--max-cruft-size'. This new flag does explicitly what it says: it combines together cruft packs which are smaller than a given threshold, and leaves alone ones which are larger. This accomplishes the original intent of '--max-cruft-size', which was to avoid repacking cruft packs larger than the given threshold. The new behavior is slightly different. Instead of building up small packs together until the threshold is met, '--combine-cruft-below-size' packs up *all* cruft packs smaller than the threshold. This means that we may make a pack much larger than the given threshold (e.g., if you aggregate 5 packs which are each 99 MiB in size with a threshold of 100 MiB). But that's OK: the point isn't to restrict the size of the cruft packs we generate, it's to avoid working with ones that have already grown too large. If repositories still want to limit the size of the generated cruft pack(s), they may use '--max-cruft-size'. There's some minor test fallout as a result of the slight differences in behavior between the old meaning of '--max-cruft-size' and the behavior of '--combine-cruft-below-size'. In the test which is now called "--combine-cruft-below-size combines packs", we need to use the new flag over the old one to exercise that test's intended behavior. The remainder of the changes there are to improve the clarity of the comments. Suggested-by: Elijah Newren <[email protected]> Signed-off-by: Taylor Blau <[email protected]> Acked-by: Elijah Newren <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 0855ed9 commit 484d7ad

File tree

3 files changed

+47
-22
lines changed

3 files changed

+47
-22
lines changed

Documentation/git-repack.adoc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,15 @@ to the new separate pack will be written.
8181
`--max-pack-size` (if any) by default. See the documentation for
8282
`--max-pack-size` for more details.
8383

84+
--combine-cruft-below-size=<n>::
85+
When generating cruft packs without pruning, only repack
86+
existing cruft packs whose size is strictly less than `<n>`,
87+
where `<n>` represents a number of bytes, which can optionally
88+
be suffixed with "k", "m", or "g". Cruft packs whose size is
89+
greater than or equal to `<n>` are left as-is and not repacked.
90+
Useful when you want to avoid repacking large cruft pack(s) in
91+
repositories that have many and/or large unreachable objects.
92+
8493
--expire-to=<dir>::
8594
Write a cruft pack containing pruned objects (if any) to the
8695
directory `<dir>`. This option is useful for keeping a copy of

builtin/repack.c

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,20 +1022,13 @@ static int write_filtered_pack(const struct pack_objects_args *args,
10221022
return finish_pack_objects_cmd(&cmd, names, local);
10231023
}
10241024

1025-
static void collapse_small_cruft_packs(FILE *in, size_t max_size UNUSED,
1026-
struct existing_packs *existing)
1025+
static void combine_small_cruft_packs(FILE *in, size_t combine_cruft_below_size,
1026+
struct existing_packs *existing)
10271027
{
10281028
struct packed_git *p;
10291029
struct strbuf buf = STRBUF_INIT;
10301030
size_t i;
10311031

1032-
/*
1033-
* Squelch a -Wunused-function warning while we rationalize
1034-
* the behavior of --max-cruft-size. This function will become
1035-
* used again in a future commit.
1036-
*/
1037-
(void)retain_cruft_pack;
1038-
10391032
for (p = get_all_packs(the_repository); p; p = p->next) {
10401033
if (!(p->is_cruft && p->pack_local))
10411034
continue;
@@ -1047,7 +1040,12 @@ static void collapse_small_cruft_packs(FILE *in, size_t max_size UNUSED,
10471040
if (!string_list_has_string(&existing->cruft_packs, buf.buf))
10481041
continue;
10491042

1050-
fprintf(in, "-%s.pack\n", buf.buf);
1043+
if (p->pack_size < combine_cruft_below_size) {
1044+
fprintf(in, "-%s\n", pack_basename(p));
1045+
} else {
1046+
retain_cruft_pack(existing, p);
1047+
fprintf(in, "%s\n", pack_basename(p));
1048+
}
10511049
}
10521050

10531051
for (i = 0; i < existing->non_kept_packs.nr; i++)
@@ -1061,6 +1059,7 @@ static int write_cruft_pack(const struct pack_objects_args *args,
10611059
const char *destination,
10621060
const char *pack_prefix,
10631061
const char *cruft_expiration,
1062+
unsigned long combine_cruft_below_size,
10641063
struct string_list *names,
10651064
struct existing_packs *existing)
10661065
{
@@ -1103,8 +1102,9 @@ static int write_cruft_pack(const struct pack_objects_args *args,
11031102
in = xfdopen(cmd.in, "w");
11041103
for_each_string_list_item(item, names)
11051104
fprintf(in, "%s-%s.pack\n", pack_prefix, item->string);
1106-
if (args->max_pack_size && !cruft_expiration) {
1107-
collapse_small_cruft_packs(in, args->max_pack_size, existing);
1105+
if (combine_cruft_below_size && !cruft_expiration) {
1106+
combine_small_cruft_packs(in, combine_cruft_below_size,
1107+
existing);
11081108
} else {
11091109
for_each_string_list_item(item, &existing->non_kept_packs)
11101110
fprintf(in, "-%s.pack\n", item->string);
@@ -1158,6 +1158,7 @@ int cmd_repack(int argc,
11581158
const char *opt_window_memory = NULL;
11591159
const char *opt_depth = NULL;
11601160
const char *opt_threads = NULL;
1161+
unsigned long combine_cruft_below_size = 0ul;
11611162

11621163
struct option builtin_repack_options[] = {
11631164
OPT_BIT('a', NULL, &pack_everything,
@@ -1170,6 +1171,9 @@ int cmd_repack(int argc,
11701171
PACK_CRUFT),
11711172
OPT_STRING(0, "cruft-expiration", &cruft_expiration, N_("approxidate"),
11721173
N_("with --cruft, expire objects older than this")),
1174+
OPT_MAGNITUDE(0, "combine-cruft-below-size",
1175+
&combine_cruft_below_size,
1176+
N_("with --cruft, only repack cruft packs smaller than this")),
11731177
OPT_MAGNITUDE(0, "max-cruft-size", &cruft_po_args.max_pack_size,
11741178
N_("with --cruft, limit the size of new cruft packs")),
11751179
OPT_BOOL('d', NULL, &delete_redundant,
@@ -1413,7 +1417,8 @@ int cmd_repack(int argc,
14131417
cruft_po_args.quiet = po_args.quiet;
14141418

14151419
ret = write_cruft_pack(&cruft_po_args, packtmp, pack_prefix,
1416-
cruft_expiration, &names,
1420+
cruft_expiration,
1421+
combine_cruft_below_size, &names,
14171422
&existing);
14181423
if (ret)
14191424
goto cleanup;
@@ -1440,10 +1445,17 @@ int cmd_repack(int argc,
14401445
* generate an empty pack (since every object not in the
14411446
* cruft pack generated above will have an mtime older
14421447
* than the expiration).
1448+
*
1449+
* Pretend we don't have a `--combine-cruft-below-size`
1450+
* argument, since we're not selectively combining
1451+
* anything based on size to generate the limbo cruft
1452+
* pack, but rather removing all cruft packs from the
1453+
* main repository regardless of size.
14431454
*/
14441455
ret = write_cruft_pack(&cruft_po_args, expire_to,
14451456
pack_prefix,
14461457
NULL,
1458+
0ul,
14471459
&names,
14481460
&existing);
14491461
if (ret)

t/t7704-repack-cruft.sh

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -194,10 +194,13 @@ test_expect_success '--max-cruft-size combines existing packs when not too large
194194
)
195195
'
196196

197-
test_expect_failure '--max-cruft-size combines smaller packs first' '
198-
git init max-cruft-size-consume-small &&
197+
test_expect_success '--combine-cruft-below-size combines packs' '
198+
repo=combine-cruft-below-size &&
199+
test_when_finished "rm -fr $repo" &&
200+
201+
git init "$repo" &&
199202
(
200-
cd max-cruft-size-consume-small &&
203+
cd "$repo" &&
201204
202205
test_commit base &&
203206
git repack -ad &&
@@ -211,11 +214,11 @@ test_expect_failure '--max-cruft-size combines smaller packs first' '
211214
test-tool pack-mtimes "$(basename $cruft_bar)" >>expect.raw &&
212215
sort expect.raw >expect.objects &&
213216
214-
# repacking with `--max-cruft-size=2M` should combine
215-
# both 0.5 MiB packs together, instead of, say, one of
216-
# the 0.5 MiB packs with the 1.0 MiB pack
217+
# Repacking with `--combine-cruft-below-size=1M`
218+
# should combine both 0.5 MiB packs together, but
219+
# ignore the two packs which are >= 1.0 MiB.
217220
ls $packdir/pack-*.mtimes | sort >cruft.before &&
218-
git repack -d --cruft --max-cruft-size=2M &&
221+
git repack -d --cruft --combine-cruft-below-size=1M &&
219222
ls $packdir/pack-*.mtimes | sort >cruft.after &&
220223
221224
comm -13 cruft.before cruft.after >cruft.new &&
@@ -224,11 +227,12 @@ test_expect_failure '--max-cruft-size combines smaller packs first' '
224227
test_line_count = 1 cruft.new &&
225228
test_line_count = 2 cruft.removed &&
226229
227-
# the two smaller packs should be rolled up first
230+
# The two packs smaller than 1.0MiB should be repacked
231+
# together.
228232
printf "%s\n" $cruft_foo $cruft_bar | sort >expect.removed &&
229233
test_cmp expect.removed cruft.removed &&
230234
231-
# ...and contain the set of objects rolled up
235+
# ...and contain the set of objects rolled up.
232236
test-tool pack-mtimes "$(basename $(cat cruft.new))" >actual.raw &&
233237
sort actual.raw >actual.objects &&
234238

0 commit comments

Comments
 (0)