Skip to content

Commit 1fdedb7

Browse files
committed
Merge branch 'cc/repack-sift-filtered-objects-to-separate-pack'
"git repack" machinery learns to pay attention to the "--filter=" option. * cc/repack-sift-filtered-objects-to-separate-pack: gc: add `gc.repackFilterTo` config option repack: implement `--filter-to` for storing filtered out objects gc: add `gc.repackFilter` config option repack: add `--filter=<filter-spec>` option pack-bitmap-write: rebuild using new bitmap when remapping repack: refactor finding pack prefix repack: refactor finishing pack-objects command t/helper: add 'find-pack' test-tool pack-objects: allow `--filter` without `--stdout`
2 parents afb0d08 + 9b96046 commit 1fdedb7

15 files changed

+544
-51
lines changed

Documentation/config/gc.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,22 @@ Multiple hooks are supported, but all must exit successfully, else the
145145
operation (either generating a cruft pack or unpacking unreachable
146146
objects) will be halted.
147147

148+
gc.repackFilter::
149+
When repacking, use the specified filter to move certain
150+
objects into a separate packfile. See the
151+
`--filter=<filter-spec>` option of linkgit:git-repack[1].
152+
153+
gc.repackFilterTo::
154+
When repacking and using a filter, see `gc.repackFilter`, the
155+
specified location will be used to create the packfile
156+
containing the filtered out objects. **WARNING:** The
157+
specified location should be accessible, using for example the
158+
Git alternates mechanism, otherwise the repo could be
159+
considered corrupt by Git as it migh not be able to access the
160+
objects in that packfile. See the `--filter-to=<dir>` option
161+
of linkgit:git-repack[1] and the `objects/info/alternates`
162+
section of linkgit:gitrepository-layout[5].
163+
148164
gc.rerereResolved::
149165
Records of conflicted merge you resolved earlier are
150166
kept for this many days when 'git rerere gc' is run.

Documentation/git-pack-objects.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,8 +296,8 @@ So does `git bundle` (see linkgit:git-bundle[1]) when it creates a bundle.
296296
nevertheless.
297297

298298
--filter=<filter-spec>::
299-
Requires `--stdout`. Omits certain objects (usually blobs) from
300-
the resulting packfile. See linkgit:git-rev-list[1] for valid
299+
Omits certain objects (usually blobs) from the resulting
300+
packfile. See linkgit:git-rev-list[1] for valid
301301
`<filter-spec>` forms.
302302

303303
--no-filter::

Documentation/git-repack.txt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,29 @@ depth is 4095.
143143
a larger and slower repository; see the discussion in
144144
`pack.packSizeLimit`.
145145

146+
--filter=<filter-spec>::
147+
Remove objects matching the filter specification from the
148+
resulting packfile and put them into a separate packfile. Note
149+
that objects used in the working directory are not filtered
150+
out. So for the split to fully work, it's best to perform it
151+
in a bare repo and to use the `-a` and `-d` options along with
152+
this option. Also `--no-write-bitmap-index` (or the
153+
`repack.writebitmaps` config option set to `false`) should be
154+
used otherwise writing bitmap index will fail, as it supposes
155+
a single packfile containing all the objects. See
156+
linkgit:git-rev-list[1] for valid `<filter-spec>` forms.
157+
158+
--filter-to=<dir>::
159+
Write the pack containing filtered out objects to the
160+
directory `<dir>`. Only useful with `--filter`. This can be
161+
used for putting the pack on a separate object directory that
162+
is accessed through the Git alternates mechanism. **WARNING:**
163+
If the packfile containing the filtered out objects is not
164+
accessible, the repo can become corrupt as it might not be
165+
possible to access the objects in that packfile. See the
166+
`objects` and `objects/info/alternates` sections of
167+
linkgit:gitrepository-layout[5].
168+
146169
-b::
147170
--write-bitmap-index::
148171
Write a reachability bitmap index as part of the repack. This

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -800,6 +800,7 @@ TEST_BUILTINS_OBJS += test-dump-untracked-cache.o
800800
TEST_BUILTINS_OBJS += test-env-helper.o
801801
TEST_BUILTINS_OBJS += test-example-decorate.o
802802
TEST_BUILTINS_OBJS += test-fast-rebase.o
803+
TEST_BUILTINS_OBJS += test-find-pack.o
803804
TEST_BUILTINS_OBJS += test-fsmonitor-client.o
804805
TEST_BUILTINS_OBJS += test-genrandom.o
805806
TEST_BUILTINS_OBJS += test-genzeros.o

builtin/gc.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ static timestamp_t gc_log_expire_time;
6161
static const char *gc_log_expire = "1.day.ago";
6262
static const char *prune_expire = "2.weeks.ago";
6363
static const char *prune_worktrees_expire = "3.months.ago";
64+
static char *repack_filter;
65+
static char *repack_filter_to;
6466
static unsigned long big_pack_threshold;
6567
static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE;
6668

@@ -170,6 +172,9 @@ static void gc_config(void)
170172
git_config_get_ulong("gc.bigpackthreshold", &big_pack_threshold);
171173
git_config_get_ulong("pack.deltacachesize", &max_delta_cache_size);
172174

175+
git_config_get_string("gc.repackfilter", &repack_filter);
176+
git_config_get_string("gc.repackfilterto", &repack_filter_to);
177+
173178
git_config(git_default_config, NULL);
174179
}
175180

@@ -355,6 +360,11 @@ static void add_repack_all_option(struct string_list *keep_pack)
355360

356361
if (keep_pack)
357362
for_each_string_list(keep_pack, keep_one_pack, NULL);
363+
364+
if (repack_filter && *repack_filter)
365+
strvec_pushf(&repack, "--filter=%s", repack_filter);
366+
if (repack_filter_to && *repack_filter_to)
367+
strvec_pushf(&repack, "--filter-to=%s", repack_filter_to);
358368
}
359369

360370
static void add_repack_incremental_option(void)

builtin/pack-objects.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4402,12 +4402,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
44024402
if (!rev_list_all || !rev_list_reflog || !rev_list_index)
44034403
unpack_unreachable_expiration = 0;
44044404

4405-
if (filter_options.choice) {
4406-
if (!pack_to_stdout)
4407-
die(_("cannot use --filter without --stdout"));
4408-
if (stdin_packs)
4409-
die(_("cannot use --filter with --stdin-packs"));
4410-
}
4405+
if (stdin_packs && filter_options.choice)
4406+
die(_("cannot use --filter with --stdin-packs"));
44114407

44124408
if (stdin_packs && use_internal_rev_list)
44134409
die(_("cannot use internal rev list with --stdin-packs"));

builtin/repack.c

Lines changed: 122 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "pack.h"
2222
#include "pack-bitmap.h"
2323
#include "refs.h"
24+
#include "list-objects-filter-options.h"
2425

2526
#define ALL_INTO_ONE 1
2627
#define LOOSEN_UNREACHABLE 2
@@ -56,6 +57,7 @@ struct pack_objects_args {
5657
int no_reuse_object;
5758
int quiet;
5859
int local;
60+
struct list_objects_filter_options filter_options;
5961
};
6062

6163
static int repack_config(const char *var, const char *value,
@@ -806,6 +808,86 @@ static void remove_redundant_bitmaps(struct string_list *include,
806808
strbuf_release(&path);
807809
}
808810

811+
static int finish_pack_objects_cmd(struct child_process *cmd,
812+
struct string_list *names,
813+
int local)
814+
{
815+
FILE *out;
816+
struct strbuf line = STRBUF_INIT;
817+
818+
out = xfdopen(cmd->out, "r");
819+
while (strbuf_getline_lf(&line, out) != EOF) {
820+
struct string_list_item *item;
821+
822+
if (line.len != the_hash_algo->hexsz)
823+
die(_("repack: Expecting full hex object ID lines only "
824+
"from pack-objects."));
825+
/*
826+
* Avoid putting packs written outside of the repository in the
827+
* list of names.
828+
*/
829+
if (local) {
830+
item = string_list_append(names, line.buf);
831+
item->util = populate_pack_exts(line.buf);
832+
}
833+
}
834+
fclose(out);
835+
836+
strbuf_release(&line);
837+
838+
return finish_command(cmd);
839+
}
840+
841+
static int write_filtered_pack(const struct pack_objects_args *args,
842+
const char *destination,
843+
const char *pack_prefix,
844+
struct existing_packs *existing,
845+
struct string_list *names)
846+
{
847+
struct child_process cmd = CHILD_PROCESS_INIT;
848+
struct string_list_item *item;
849+
FILE *in;
850+
int ret;
851+
const char *caret;
852+
const char *scratch;
853+
int local = skip_prefix(destination, packdir, &scratch);
854+
855+
prepare_pack_objects(&cmd, args, destination);
856+
857+
strvec_push(&cmd.args, "--stdin-packs");
858+
859+
if (!pack_kept_objects)
860+
strvec_push(&cmd.args, "--honor-pack-keep");
861+
for_each_string_list_item(item, &existing->kept_packs)
862+
strvec_pushf(&cmd.args, "--keep-pack=%s", item->string);
863+
864+
cmd.in = -1;
865+
866+
ret = start_command(&cmd);
867+
if (ret)
868+
return ret;
869+
870+
/*
871+
* Here 'names' contains only the pack(s) that were just
872+
* written, which is exactly the packs we want to keep. Also
873+
* 'existing_kept_packs' already contains the packs in
874+
* 'keep_pack_list'.
875+
*/
876+
in = xfdopen(cmd.in, "w");
877+
for_each_string_list_item(item, names)
878+
fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string);
879+
for_each_string_list_item(item, &existing->non_kept_packs)
880+
fprintf(in, "%s.pack\n", item->string);
881+
for_each_string_list_item(item, &existing->cruft_packs)
882+
fprintf(in, "%s.pack\n", item->string);
883+
caret = pack_kept_objects ? "" : "^";
884+
for_each_string_list_item(item, &existing->kept_packs)
885+
fprintf(in, "%s%s.pack\n", caret, item->string);
886+
fclose(in);
887+
888+
return finish_pack_objects_cmd(&cmd, names, local);
889+
}
890+
809891
static int write_cruft_pack(const struct pack_objects_args *args,
810892
const char *destination,
811893
const char *pack_prefix,
@@ -814,9 +896,8 @@ static int write_cruft_pack(const struct pack_objects_args *args,
814896
struct existing_packs *existing)
815897
{
816898
struct child_process cmd = CHILD_PROCESS_INIT;
817-
struct strbuf line = STRBUF_INIT;
818899
struct string_list_item *item;
819-
FILE *in, *out;
900+
FILE *in;
820901
int ret;
821902
const char *scratch;
822903
int local = skip_prefix(destination, packdir, &scratch);
@@ -861,27 +942,18 @@ static int write_cruft_pack(const struct pack_objects_args *args,
861942
fprintf(in, "%s.pack\n", item->string);
862943
fclose(in);
863944

864-
out = xfdopen(cmd.out, "r");
865-
while (strbuf_getline_lf(&line, out) != EOF) {
866-
struct string_list_item *item;
867-
868-
if (line.len != the_hash_algo->hexsz)
869-
die(_("repack: Expecting full hex object ID lines only "
870-
"from pack-objects."));
871-
/*
872-
* avoid putting packs written outside of the repository in the
873-
* list of names
874-
*/
875-
if (local) {
876-
item = string_list_append(names, line.buf);
877-
item->util = populate_pack_exts(line.buf);
878-
}
879-
}
880-
fclose(out);
881-
882-
strbuf_release(&line);
945+
return finish_pack_objects_cmd(&cmd, names, local);
946+
}
883947

884-
return finish_command(&cmd);
948+
static const char *find_pack_prefix(const char *packdir, const char *packtmp)
949+
{
950+
const char *pack_prefix;
951+
if (!skip_prefix(packtmp, packdir, &pack_prefix))
952+
die(_("pack prefix %s does not begin with objdir %s"),
953+
packtmp, packdir);
954+
if (*pack_prefix == '/')
955+
pack_prefix++;
956+
return pack_prefix;
885957
}
886958

887959
int cmd_repack(int argc, const char **argv, const char *prefix)
@@ -891,10 +963,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
891963
struct string_list names = STRING_LIST_INIT_DUP;
892964
struct existing_packs existing = EXISTING_PACKS_INIT;
893965
struct pack_geometry geometry = { 0 };
894-
struct strbuf line = STRBUF_INIT;
895966
struct tempfile *refs_snapshot = NULL;
896967
int i, ext, ret;
897-
FILE *out;
898968
int show_progress;
899969

900970
/* variables to be filled by option parsing */
@@ -907,6 +977,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
907977
int write_midx = 0;
908978
const char *cruft_expiration = NULL;
909979
const char *expire_to = NULL;
980+
const char *filter_to = NULL;
910981

911982
struct option builtin_repack_options[] = {
912983
OPT_BIT('a', NULL, &pack_everything,
@@ -948,6 +1019,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
9481019
N_("limits the maximum number of threads")),
9491020
OPT_STRING(0, "max-pack-size", &po_args.max_pack_size, N_("bytes"),
9501021
N_("maximum size of each packfile")),
1022+
OPT_PARSE_LIST_OBJECTS_FILTER(&po_args.filter_options),
9511023
OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects,
9521024
N_("repack objects in packs marked with .keep")),
9531025
OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
@@ -958,9 +1030,13 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
9581030
N_("write a multi-pack index of the resulting packs")),
9591031
OPT_STRING(0, "expire-to", &expire_to, N_("dir"),
9601032
N_("pack prefix to store a pack containing pruned objects")),
1033+
OPT_STRING(0, "filter-to", &filter_to, N_("dir"),
1034+
N_("pack prefix to store a pack containing filtered out objects")),
9611035
OPT_END()
9621036
};
9631037

1038+
list_objects_filter_init(&po_args.filter_options);
1039+
9641040
git_config(repack_config, &cruft_po_args);
9651041

9661042
argc = parse_options(argc, argv, prefix, builtin_repack_options,
@@ -1101,6 +1177,12 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
11011177
strvec_push(&cmd.args, "--incremental");
11021178
}
11031179

1180+
if (po_args.filter_options.choice)
1181+
strvec_pushf(&cmd.args, "--filter=%s",
1182+
expand_list_objects_filter_spec(&po_args.filter_options));
1183+
else if (filter_to)
1184+
die(_("option '%s' can only be used along with '%s'"), "--filter-to", "--filter");
1185+
11041186
if (geometry.split_factor)
11051187
cmd.in = -1;
11061188
else
@@ -1124,31 +1206,15 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
11241206
fclose(in);
11251207
}
11261208

1127-
out = xfdopen(cmd.out, "r");
1128-
while (strbuf_getline_lf(&line, out) != EOF) {
1129-
struct string_list_item *item;
1130-
1131-
if (line.len != the_hash_algo->hexsz)
1132-
die(_("repack: Expecting full hex object ID lines only from pack-objects."));
1133-
item = string_list_append(&names, line.buf);
1134-
item->util = populate_pack_exts(item->string);
1135-
}
1136-
strbuf_release(&line);
1137-
fclose(out);
1138-
ret = finish_command(&cmd);
1209+
ret = finish_pack_objects_cmd(&cmd, &names, 1);
11391210
if (ret)
11401211
goto cleanup;
11411212

11421213
if (!names.nr && !po_args.quiet)
11431214
printf_ln(_("Nothing new to pack."));
11441215

11451216
if (pack_everything & PACK_CRUFT) {
1146-
const char *pack_prefix;
1147-
if (!skip_prefix(packtmp, packdir, &pack_prefix))
1148-
die(_("pack prefix %s does not begin with objdir %s"),
1149-
packtmp, packdir);
1150-
if (*pack_prefix == '/')
1151-
pack_prefix++;
1217+
const char *pack_prefix = find_pack_prefix(packdir, packtmp);
11521218

11531219
if (!cruft_po_args.window)
11541220
cruft_po_args.window = po_args.window;
@@ -1203,6 +1269,19 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
12031269
}
12041270
}
12051271

1272+
if (po_args.filter_options.choice) {
1273+
if (!filter_to)
1274+
filter_to = packtmp;
1275+
1276+
ret = write_filtered_pack(&po_args,
1277+
filter_to,
1278+
find_pack_prefix(packdir, packtmp),
1279+
&existing,
1280+
&names);
1281+
if (ret)
1282+
goto cleanup;
1283+
}
1284+
12061285
string_list_sort(&names);
12071286

12081287
close_object_store(the_repository->objects);
@@ -1295,6 +1374,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
12951374
string_list_clear(&names, 1);
12961375
existing_packs_release(&existing);
12971376
free_pack_geometry(&geometry);
1377+
list_objects_filter_release(&po_args.filter_options);
12981378

12991379
return ret;
13001380
}

0 commit comments

Comments
 (0)