Skip to content

Commit 79861ba

Browse files
committed
Merge branch 'tb/repack-max-cruft-size'
"git repack" learned "--max-cruft-size" to prevent cruft packs from growing without bounds. * tb/repack-max-cruft-size: repack: free existing_cruft array after use builtin/repack.c: avoid making cruft packs preferred builtin/repack.c: implement support for `--max-cruft-size` builtin/repack.c: parse `--max-pack-size` with OPT_MAGNITUDE t7700: split cruft-related tests to t7704
2 parents a9ecda2 + c1b754d commit 79861ba

File tree

8 files changed

+645
-136
lines changed

8 files changed

+645
-136
lines changed

Documentation/config/gc.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,12 @@ gc.cruftPacks::
8686
linkgit:git-repack[1]) instead of as loose objects. The default
8787
is `true`.
8888

89+
gc.maxCruftSize::
90+
Limit the size of new cruft packs when repacking. When
91+
specified in addition to `--max-cruft-size`, the command line
92+
option takes priority. See the `--max-cruft-size` option of
93+
linkgit:git-repack[1].
94+
8995
gc.pruneExpire::
9096
When 'git gc' is run, it will call 'prune --expire 2.weeks.ago'
9197
(and 'repack --cruft --cruft-expiration 2.weeks.ago' if using

Documentation/git-gc.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,13 @@ be performed as well.
5959
cruft pack instead of storing them as loose objects. `--cruft`
6060
is on by default.
6161

62+
--max-cruft-size=<n>::
63+
When packing unreachable objects into a cruft pack, limit the
64+
size of new cruft packs to be at most `<n>` bytes. Overrides any
65+
value specified via the `gc.maxCruftSize` configuration. See
66+
the `--max-cruft-size` option of linkgit:git-repack[1] for
67+
more.
68+
6269
--prune=<date>::
6370
Prune loose objects older than date (default is 2 weeks ago,
6471
overridable by the config variable `gc.pruneExpire`).

Documentation/git-repack.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,17 @@ to the new separate pack will be written.
7474
immediately instead of waiting for the next `git gc` invocation.
7575
Only useful with `--cruft -d`.
7676

77+
--max-cruft-size=<n>::
78+
Repack cruft objects into packs as large as `<n>` bytes before
79+
creating new packs. As long as there are enough cruft packs
80+
smaller than `<n>`, repacking will cause a new cruft pack to
81+
be created containing objects from any combined cruft packs,
82+
along with any new unreachable objects. Cruft packs larger than
83+
`<n>` will not be modified. When the new cruft pack is larger
84+
than `<n>` bytes, it will be split into multiple packs, all of
85+
which are guaranteed to be at most `<n>` bytes in size. Only
86+
useful with `--cruft -d`.
87+
7788
--expire-to=<dir>::
7889
Write a cruft pack containing pruned objects (if any) to the
7990
directory `<dir>`. This option is useful for keeping a copy of

builtin/gc.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ static const char * const builtin_gc_usage[] = {
5252
static int pack_refs = 1;
5353
static int prune_reflogs = 1;
5454
static int cruft_packs = 1;
55+
static unsigned long max_cruft_size;
5556
static int aggressive_depth = 50;
5657
static int aggressive_window = 250;
5758
static int gc_auto_threshold = 6700;
@@ -165,6 +166,7 @@ static void gc_config(void)
165166
git_config_get_int("gc.autopacklimit", &gc_auto_pack_limit);
166167
git_config_get_bool("gc.autodetach", &detach_auto);
167168
git_config_get_bool("gc.cruftpacks", &cruft_packs);
169+
git_config_get_ulong("gc.maxcruftsize", &max_cruft_size);
168170
git_config_get_expiry("gc.pruneexpire", &prune_expire);
169171
git_config_get_expiry("gc.worktreepruneexpire", &prune_worktrees_expire);
170172
git_config_get_expiry("gc.logexpiry", &gc_log_expire);
@@ -352,6 +354,9 @@ static void add_repack_all_option(struct string_list *keep_pack)
352354
strvec_push(&repack, "--cruft");
353355
if (prune_expire)
354356
strvec_pushf(&repack, "--cruft-expiration=%s", prune_expire);
357+
if (max_cruft_size)
358+
strvec_pushf(&repack, "--max-cruft-size=%lu",
359+
max_cruft_size);
355360
} else {
356361
strvec_push(&repack, "-A");
357362
if (prune_expire)
@@ -585,6 +590,8 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
585590
N_("prune unreferenced objects"),
586591
PARSE_OPT_OPTARG, NULL, (intptr_t)prune_expire },
587592
OPT_BOOL(0, "cruft", &cruft_packs, N_("pack unreferenced objects separately")),
593+
OPT_MAGNITUDE(0, "max-cruft-size", &max_cruft_size,
594+
N_("with --cruft, limit the size of new cruft packs")),
588595
OPT_BOOL(0, "aggressive", &aggressive, N_("be more thorough (increased runtime)")),
589596
OPT_BOOL_F(0, "auto", &auto_gc, N_("enable auto-gc mode"),
590597
PARSE_OPT_NOCOMPLETE),

builtin/repack.c

Lines changed: 173 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#define PACK_CRUFT 4
2929

3030
#define DELETE_PACK 1
31+
#define RETAIN_PACK 2
3132

3233
static int pack_everything;
3334
static int delta_base_offset = 1;
@@ -52,7 +53,7 @@ struct pack_objects_args {
5253
const char *window_memory;
5354
const char *depth;
5455
const char *threads;
55-
const char *max_pack_size;
56+
unsigned long max_pack_size;
5657
int no_reuse_delta;
5758
int no_reuse_object;
5859
int quiet;
@@ -118,11 +119,26 @@ static void pack_mark_for_deletion(struct string_list_item *item)
118119
item->util = (void*)((uintptr_t)item->util | DELETE_PACK);
119120
}
120121

122+
static void pack_unmark_for_deletion(struct string_list_item *item)
123+
{
124+
item->util = (void*)((uintptr_t)item->util & ~DELETE_PACK);
125+
}
126+
121127
static int pack_is_marked_for_deletion(struct string_list_item *item)
122128
{
123129
return (uintptr_t)item->util & DELETE_PACK;
124130
}
125131

132+
static void pack_mark_retained(struct string_list_item *item)
133+
{
134+
item->util = (void*)((uintptr_t)item->util | RETAIN_PACK);
135+
}
136+
137+
static int pack_is_retained(struct string_list_item *item)
138+
{
139+
return (uintptr_t)item->util & RETAIN_PACK;
140+
}
141+
126142
static void mark_packs_for_deletion_1(struct string_list *names,
127143
struct string_list *list)
128144
{
@@ -135,17 +151,39 @@ static void mark_packs_for_deletion_1(struct string_list *names,
135151
if (len < hexsz)
136152
continue;
137153
sha1 = item->string + len - hexsz;
138-
/*
139-
* Mark this pack for deletion, which ensures that this
140-
* pack won't be included in a MIDX (if `--write-midx`
141-
* was given) and that we will actually delete this pack
142-
* (if `-d` was given).
143-
*/
144-
if (!string_list_has_string(names, sha1))
154+
155+
if (pack_is_retained(item)) {
156+
pack_unmark_for_deletion(item);
157+
} else if (!string_list_has_string(names, sha1)) {
158+
/*
159+
* Mark this pack for deletion, which ensures
160+
* that this pack won't be included in a MIDX
161+
* (if `--write-midx` was given) and that we
162+
* will actually delete this pack (if `-d` was
163+
* given).
164+
*/
145165
pack_mark_for_deletion(item);
166+
}
146167
}
147168
}
148169

170+
static void retain_cruft_pack(struct existing_packs *existing,
171+
struct packed_git *cruft)
172+
{
173+
struct strbuf buf = STRBUF_INIT;
174+
struct string_list_item *item;
175+
176+
strbuf_addstr(&buf, pack_basename(cruft));
177+
strbuf_strip_suffix(&buf, ".pack");
178+
179+
item = string_list_lookup(&existing->cruft_packs, buf.buf);
180+
if (!item)
181+
BUG("could not find cruft pack '%s'", pack_basename(cruft));
182+
183+
pack_mark_retained(item);
184+
strbuf_release(&buf);
185+
}
186+
149187
static void mark_packs_for_deletion(struct existing_packs *existing,
150188
struct string_list *names)
151189

@@ -227,6 +265,8 @@ static void collect_pack_filenames(struct existing_packs *existing,
227265
}
228266

229267
string_list_sort(&existing->kept_packs);
268+
string_list_sort(&existing->non_kept_packs);
269+
string_list_sort(&existing->cruft_packs);
230270
strbuf_release(&buf);
231271
}
232272

@@ -244,7 +284,7 @@ static void prepare_pack_objects(struct child_process *cmd,
244284
if (args->threads)
245285
strvec_pushf(&cmd->args, "--threads=%s", args->threads);
246286
if (args->max_pack_size)
247-
strvec_pushf(&cmd->args, "--max-pack-size=%s", args->max_pack_size);
287+
strvec_pushf(&cmd->args, "--max-pack-size=%lu", args->max_pack_size);
248288
if (args->no_reuse_delta)
249289
strvec_pushf(&cmd->args, "--no-reuse-delta");
250290
if (args->no_reuse_object)
@@ -317,6 +357,18 @@ static struct generated_pack_data *populate_pack_exts(const char *name)
317357
return data;
318358
}
319359

360+
static int has_pack_ext(const struct generated_pack_data *data,
361+
const char *ext)
362+
{
363+
int i;
364+
for (i = 0; i < ARRAY_SIZE(exts); i++) {
365+
if (strcmp(exts[i].name, ext))
366+
continue;
367+
return !!data->tempfiles[i];
368+
}
369+
BUG("unknown pack extension: '%s'", ext);
370+
}
371+
320372
static void repack_promisor_objects(const struct pack_objects_args *args,
321373
struct string_list *names)
322374
{
@@ -734,6 +786,7 @@ static void midx_included_packs(struct string_list *include,
734786

735787
static int write_midx_included_packs(struct string_list *include,
736788
struct pack_geometry *geometry,
789+
struct string_list *names,
737790
const char *refs_snapshot,
738791
int show_progress, int write_bitmaps)
739792
{
@@ -763,6 +816,38 @@ static int write_midx_included_packs(struct string_list *include,
763816
if (preferred)
764817
strvec_pushf(&cmd.args, "--preferred-pack=%s",
765818
pack_basename(preferred));
819+
else if (names->nr) {
820+
/* The largest pack was repacked, meaning that either
821+
* one or two packs exist depending on whether the
822+
* repository has a cruft pack or not.
823+
*
824+
* Select the non-cruft one as preferred to encourage
825+
* pack-reuse among packs containing reachable objects
826+
* over unreachable ones.
827+
*
828+
* (Note we could write multiple packs here if
829+
* `--max-pack-size` was given, but any one of them
830+
* will suffice, so pick the first one.)
831+
*/
832+
for_each_string_list_item(item, names) {
833+
struct generated_pack_data *data = item->util;
834+
if (has_pack_ext(data, ".mtimes"))
835+
continue;
836+
837+
strvec_pushf(&cmd.args, "--preferred-pack=pack-%s.pack",
838+
item->string);
839+
break;
840+
}
841+
} else {
842+
/*
843+
* No packs were kept, and no packs were written. The
844+
* only thing remaining are .keep packs (unless
845+
* --pack-kept-objects was given).
846+
*
847+
* Set the `--preferred-pack` arbitrarily here.
848+
*/
849+
;
850+
}
766851

767852
if (refs_snapshot)
768853
strvec_pushf(&cmd.args, "--refs-snapshot=%s", refs_snapshot);
@@ -888,6 +973,73 @@ static int write_filtered_pack(const struct pack_objects_args *args,
888973
return finish_pack_objects_cmd(&cmd, names, local);
889974
}
890975

976+
static int existing_cruft_pack_cmp(const void *va, const void *vb)
977+
{
978+
struct packed_git *a = *(struct packed_git **)va;
979+
struct packed_git *b = *(struct packed_git **)vb;
980+
981+
if (a->pack_size < b->pack_size)
982+
return -1;
983+
if (a->pack_size > b->pack_size)
984+
return 1;
985+
return 0;
986+
}
987+
988+
static void collapse_small_cruft_packs(FILE *in, size_t max_size,
989+
struct existing_packs *existing)
990+
{
991+
struct packed_git **existing_cruft, *p;
992+
struct strbuf buf = STRBUF_INIT;
993+
size_t total_size = 0;
994+
size_t existing_cruft_nr = 0;
995+
size_t i;
996+
997+
ALLOC_ARRAY(existing_cruft, existing->cruft_packs.nr);
998+
999+
for (p = get_all_packs(the_repository); p; p = p->next) {
1000+
if (!(p->is_cruft && p->pack_local))
1001+
continue;
1002+
1003+
strbuf_reset(&buf);
1004+
strbuf_addstr(&buf, pack_basename(p));
1005+
strbuf_strip_suffix(&buf, ".pack");
1006+
1007+
if (!string_list_has_string(&existing->cruft_packs, buf.buf))
1008+
continue;
1009+
1010+
if (existing_cruft_nr >= existing->cruft_packs.nr)
1011+
BUG("too many cruft packs (found %"PRIuMAX", but knew "
1012+
"of %"PRIuMAX")",
1013+
(uintmax_t)existing_cruft_nr + 1,
1014+
(uintmax_t)existing->cruft_packs.nr);
1015+
existing_cruft[existing_cruft_nr++] = p;
1016+
}
1017+
1018+
QSORT(existing_cruft, existing_cruft_nr, existing_cruft_pack_cmp);
1019+
1020+
for (i = 0; i < existing_cruft_nr; i++) {
1021+
size_t proposed;
1022+
1023+
p = existing_cruft[i];
1024+
proposed = st_add(total_size, p->pack_size);
1025+
1026+
if (proposed <= max_size) {
1027+
total_size = proposed;
1028+
fprintf(in, "-%s\n", pack_basename(p));
1029+
} else {
1030+
retain_cruft_pack(existing, p);
1031+
fprintf(in, "%s\n", pack_basename(p));
1032+
}
1033+
}
1034+
1035+
for (i = 0; i < existing->non_kept_packs.nr; i++)
1036+
fprintf(in, "-%s.pack\n",
1037+
existing->non_kept_packs.items[i].string);
1038+
1039+
strbuf_release(&buf);
1040+
free(existing_cruft);
1041+
}
1042+
8911043
static int write_cruft_pack(const struct pack_objects_args *args,
8921044
const char *destination,
8931045
const char *pack_prefix,
@@ -934,10 +1086,14 @@ static int write_cruft_pack(const struct pack_objects_args *args,
9341086
in = xfdopen(cmd.in, "w");
9351087
for_each_string_list_item(item, names)
9361088
fprintf(in, "%s-%s.pack\n", pack_prefix, item->string);
937-
for_each_string_list_item(item, &existing->non_kept_packs)
938-
fprintf(in, "-%s.pack\n", item->string);
939-
for_each_string_list_item(item, &existing->cruft_packs)
940-
fprintf(in, "-%s.pack\n", item->string);
1089+
if (args->max_pack_size && !cruft_expiration) {
1090+
collapse_small_cruft_packs(in, args->max_pack_size, existing);
1091+
} else {
1092+
for_each_string_list_item(item, &existing->non_kept_packs)
1093+
fprintf(in, "-%s.pack\n", item->string);
1094+
for_each_string_list_item(item, &existing->cruft_packs)
1095+
fprintf(in, "-%s.pack\n", item->string);
1096+
}
9411097
for_each_string_list_item(item, &existing->kept_packs)
9421098
fprintf(in, "%s.pack\n", item->string);
9431099
fclose(in);
@@ -990,6 +1146,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
9901146
PACK_CRUFT),
9911147
OPT_STRING(0, "cruft-expiration", &cruft_expiration, N_("approxidate"),
9921148
N_("with --cruft, expire objects older than this")),
1149+
OPT_MAGNITUDE(0, "max-cruft-size", &cruft_po_args.max_pack_size,
1150+
N_("with --cruft, limit the size of new cruft packs")),
9931151
OPT_BOOL('d', NULL, &delete_redundant,
9941152
N_("remove redundant packs, and run git-prune-packed")),
9951153
OPT_BOOL('f', NULL, &po_args.no_reuse_delta,
@@ -1017,7 +1175,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
10171175
N_("limits the maximum delta depth")),
10181176
OPT_STRING(0, "threads", &po_args.threads, N_("n"),
10191177
N_("limits the maximum number of threads")),
1020-
OPT_STRING(0, "max-pack-size", &po_args.max_pack_size, N_("bytes"),
1178+
OPT_MAGNITUDE(0, "max-pack-size", &po_args.max_pack_size,
10211179
N_("maximum size of each packfile")),
10221180
OPT_PARSE_LIST_OBJECTS_FILTER(&po_args.filter_options),
10231181
OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects,
@@ -1327,7 +1485,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
13271485
struct string_list include = STRING_LIST_INIT_NODUP;
13281486
midx_included_packs(&include, &existing, &names, &geometry);
13291487

1330-
ret = write_midx_included_packs(&include, &geometry,
1488+
ret = write_midx_included_packs(&include, &geometry, &names,
13311489
refs_snapshot ? get_tempfile_path(refs_snapshot) : NULL,
13321490
show_progress, write_bitmaps > 0);
13331491

0 commit comments

Comments
 (0)