Skip to content

Commit 48a9b67

Browse files
chriscoolgitster
authored andcommitted
repack: add --filter=<filter-spec> option
This new option puts the objects specified by `<filter-spec>` into a separate packfile. This could be useful if, for example, some blobs take up a lot of precious space on fast storage while they are rarely accessed. It could make sense to move them into a separate cheaper, though slower, storage. It's possible to find which new packfile contains the filtered out objects using one of the following: - `git verify-pack -v ...`, - `test-tool find-pack ...`, which a previous commit added, - `--filter-to=<dir>`, which a following commit will add to specify where the pack containing the filtered out objects will be. This feature is implemented by running `git pack-objects` twice in a row. The first command is run with `--filter=<filter-spec>`, using the specified filter. It packs objects while omitting the objects specified by the filter. Then another `git pack-objects` command is launched using `--stdin-packs`. We pass it all the previously existing packs into its stdin, so that it will pack all the objects in the previously existing packs. But we also pass into its stdin, the pack created by the previous `git pack-objects --filter=<filter-spec>` command as well as the kept packs, all prefixed with '^', so that the objects in these packs will be omitted from the resulting pack. The result is that only the objects filtered out by the first `git pack-objects` command are in the pack resulting from the second `git pack-objects` command. As the interactions with kept packs are a bit tricky, a few related tests are added. Helped-by: Taylor Blau <[email protected]> Signed-off-by: John Cai <[email protected]> Signed-off-by: Christian Couder <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 0e4747e commit 48a9b67

File tree

3 files changed

+217
-0
lines changed

3 files changed

+217
-0
lines changed

Documentation/git-repack.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,18 @@ depth is 4095.
143143
a larger and slower repository; see the discussion in
144144
`pack.packSizeLimit`.
145145

146+
--filter=<filter-spec>::
147+
Remove objects matching the filter specification from the
148+
resulting packfile and put them into a separate packfile. Note
149+
that objects used in the working directory are not filtered
150+
out. So for the split to fully work, it's best to perform it
151+
in a bare repo and to use the `-a` and `-d` options along with
152+
this option. Also `--no-write-bitmap-index` (or the
153+
`repack.writebitmaps` config option set to `false`) should be
154+
used otherwise writing bitmap index will fail, as it supposes
155+
a single packfile containing all the objects. See
156+
linkgit:git-rev-list[1] for valid `<filter-spec>` forms.
157+
146158
-b::
147159
--write-bitmap-index::
148160
Write a reachability bitmap index as part of the repack. This

builtin/repack.c

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "pack.h"
2222
#include "pack-bitmap.h"
2323
#include "refs.h"
24+
#include "list-objects-filter-options.h"
2425

2526
#define ALL_INTO_ONE 1
2627
#define LOOSEN_UNREACHABLE 2
@@ -56,6 +57,7 @@ struct pack_objects_args {
5657
int no_reuse_object;
5758
int quiet;
5859
int local;
60+
struct list_objects_filter_options filter_options;
5961
};
6062

6163
static int repack_config(const char *var, const char *value,
@@ -836,6 +838,56 @@ static int finish_pack_objects_cmd(struct child_process *cmd,
836838
return finish_command(cmd);
837839
}
838840

841+
static int write_filtered_pack(const struct pack_objects_args *args,
842+
const char *destination,
843+
const char *pack_prefix,
844+
struct existing_packs *existing,
845+
struct string_list *names)
846+
{
847+
struct child_process cmd = CHILD_PROCESS_INIT;
848+
struct string_list_item *item;
849+
FILE *in;
850+
int ret;
851+
const char *caret;
852+
const char *scratch;
853+
int local = skip_prefix(destination, packdir, &scratch);
854+
855+
prepare_pack_objects(&cmd, args, destination);
856+
857+
strvec_push(&cmd.args, "--stdin-packs");
858+
859+
if (!pack_kept_objects)
860+
strvec_push(&cmd.args, "--honor-pack-keep");
861+
for_each_string_list_item(item, &existing->kept_packs)
862+
strvec_pushf(&cmd.args, "--keep-pack=%s", item->string);
863+
864+
cmd.in = -1;
865+
866+
ret = start_command(&cmd);
867+
if (ret)
868+
return ret;
869+
870+
/*
871+
* Here 'names' contains only the pack(s) that were just
872+
* written, which is exactly the packs we want to keep. Also
873+
* 'existing_kept_packs' already contains the packs in
874+
* 'keep_pack_list'.
875+
*/
876+
in = xfdopen(cmd.in, "w");
877+
for_each_string_list_item(item, names)
878+
fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string);
879+
for_each_string_list_item(item, &existing->non_kept_packs)
880+
fprintf(in, "%s.pack\n", item->string);
881+
for_each_string_list_item(item, &existing->cruft_packs)
882+
fprintf(in, "%s.pack\n", item->string);
883+
caret = pack_kept_objects ? "" : "^";
884+
for_each_string_list_item(item, &existing->kept_packs)
885+
fprintf(in, "%s%s.pack\n", caret, item->string);
886+
fclose(in);
887+
888+
return finish_pack_objects_cmd(&cmd, names, local);
889+
}
890+
839891
static int write_cruft_pack(const struct pack_objects_args *args,
840892
const char *destination,
841893
const char *pack_prefix,
@@ -966,6 +1018,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
9661018
N_("limits the maximum number of threads")),
9671019
OPT_STRING(0, "max-pack-size", &po_args.max_pack_size, N_("bytes"),
9681020
N_("maximum size of each packfile")),
1021+
OPT_PARSE_LIST_OBJECTS_FILTER(&po_args.filter_options),
9691022
OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects,
9701023
N_("repack objects in packs marked with .keep")),
9711024
OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
@@ -979,6 +1032,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
9791032
OPT_END()
9801033
};
9811034

1035+
list_objects_filter_init(&po_args.filter_options);
1036+
9821037
git_config(repack_config, &cruft_po_args);
9831038

9841039
argc = parse_options(argc, argv, prefix, builtin_repack_options,
@@ -1119,6 +1174,10 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
11191174
strvec_push(&cmd.args, "--incremental");
11201175
}
11211176

1177+
if (po_args.filter_options.choice)
1178+
strvec_pushf(&cmd.args, "--filter=%s",
1179+
expand_list_objects_filter_spec(&po_args.filter_options));
1180+
11221181
if (geometry.split_factor)
11231182
cmd.in = -1;
11241183
else
@@ -1205,6 +1264,16 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
12051264
}
12061265
}
12071266

1267+
if (po_args.filter_options.choice) {
1268+
ret = write_filtered_pack(&po_args,
1269+
packtmp,
1270+
find_pack_prefix(packdir, packtmp),
1271+
&existing,
1272+
&names);
1273+
if (ret)
1274+
goto cleanup;
1275+
}
1276+
12081277
string_list_sort(&names);
12091278

12101279
close_object_store(the_repository->objects);
@@ -1297,6 +1366,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
12971366
string_list_clear(&names, 1);
12981367
existing_packs_release(&existing);
12991368
free_pack_geometry(&geometry);
1369+
list_objects_filter_release(&po_args.filter_options);
13001370

13011371
return ret;
13021372
}

t/t7700-repack.sh

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,141 @@ test_expect_success 'auto-bitmaps do not complain if unavailable' '
327327
test_must_be_empty actual
328328
'
329329

330+
test_expect_success 'repacking with a filter works' '
331+
git -C bare.git repack -a -d &&
332+
test_stdout_line_count = 1 ls bare.git/objects/pack/*.pack &&
333+
git -C bare.git -c repack.writebitmaps=false repack -a -d --filter=blob:none &&
334+
test_stdout_line_count = 2 ls bare.git/objects/pack/*.pack &&
335+
commit_pack=$(test-tool -C bare.git find-pack -c 1 HEAD) &&
336+
blob_pack=$(test-tool -C bare.git find-pack -c 1 HEAD:file1) &&
337+
test "$commit_pack" != "$blob_pack" &&
338+
tree_pack=$(test-tool -C bare.git find-pack -c 1 HEAD^{tree}) &&
339+
test "$tree_pack" = "$commit_pack" &&
340+
blob_pack2=$(test-tool -C bare.git find-pack -c 1 HEAD:file2) &&
341+
test "$blob_pack2" = "$blob_pack"
342+
'
343+
344+
test_expect_success '--filter fails with --write-bitmap-index' '
345+
test_must_fail \
346+
env GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \
347+
git -C bare.git repack -a -d --write-bitmap-index --filter=blob:none
348+
'
349+
350+
test_expect_success 'repacking with two filters works' '
351+
git init two-filters &&
352+
(
353+
cd two-filters &&
354+
mkdir subdir &&
355+
test_commit foo &&
356+
test_commit subdir_bar subdir/bar &&
357+
test_commit subdir_baz subdir/baz
358+
) &&
359+
git clone --no-local --bare two-filters two-filters.git &&
360+
(
361+
cd two-filters.git &&
362+
test_stdout_line_count = 1 ls objects/pack/*.pack &&
363+
git -c repack.writebitmaps=false repack -a -d \
364+
--filter=blob:none --filter=tree:1 &&
365+
test_stdout_line_count = 2 ls objects/pack/*.pack &&
366+
commit_pack=$(test-tool find-pack -c 1 HEAD) &&
367+
blob_pack=$(test-tool find-pack -c 1 HEAD:foo.t) &&
368+
root_tree_pack=$(test-tool find-pack -c 1 HEAD^{tree}) &&
369+
subdir_tree_hash=$(git ls-tree --object-only HEAD -- subdir) &&
370+
subdir_tree_pack=$(test-tool find-pack -c 1 "$subdir_tree_hash") &&
371+
372+
# Root tree and subdir tree are not in the same packfiles
373+
test "$commit_pack" != "$blob_pack" &&
374+
test "$commit_pack" = "$root_tree_pack" &&
375+
test "$blob_pack" = "$subdir_tree_pack"
376+
)
377+
'
378+
379+
prepare_for_keep_packs () {
380+
git init keep-packs &&
381+
(
382+
cd keep-packs &&
383+
test_commit foo &&
384+
test_commit bar
385+
) &&
386+
git clone --no-local --bare keep-packs keep-packs.git &&
387+
(
388+
cd keep-packs.git &&
389+
390+
# Create two packs
391+
# The first pack will contain all of the objects except one blob
392+
git rev-list --objects --all >objs &&
393+
grep -v "bar.t" objs | git pack-objects pack &&
394+
# The second pack will contain the excluded object and be kept
395+
packid=$(grep "bar.t" objs | git pack-objects pack) &&
396+
>pack-$packid.keep &&
397+
398+
# Replace the existing pack with the 2 new ones
399+
rm -f objects/pack/pack* &&
400+
mv pack-* objects/pack/
401+
)
402+
}
403+
404+
test_expect_success '--filter works with .keep packs' '
405+
prepare_for_keep_packs &&
406+
(
407+
cd keep-packs.git &&
408+
409+
foo_pack=$(test-tool find-pack -c 1 HEAD:foo.t) &&
410+
bar_pack=$(test-tool find-pack -c 1 HEAD:bar.t) &&
411+
head_pack=$(test-tool find-pack -c 1 HEAD) &&
412+
413+
test "$foo_pack" != "$bar_pack" &&
414+
test "$foo_pack" = "$head_pack" &&
415+
416+
git -c repack.writebitmaps=false repack -a -d --filter=blob:none &&
417+
418+
foo_pack_1=$(test-tool find-pack -c 1 HEAD:foo.t) &&
419+
bar_pack_1=$(test-tool find-pack -c 1 HEAD:bar.t) &&
420+
head_pack_1=$(test-tool find-pack -c 1 HEAD) &&
421+
422+
# Object bar is still only in the old .keep pack
423+
test "$foo_pack_1" != "$foo_pack" &&
424+
test "$bar_pack_1" = "$bar_pack" &&
425+
test "$head_pack_1" != "$head_pack" &&
426+
427+
test "$foo_pack_1" != "$bar_pack_1" &&
428+
test "$foo_pack_1" != "$head_pack_1" &&
429+
test "$bar_pack_1" != "$head_pack_1"
430+
)
431+
'
432+
433+
test_expect_success '--filter works with --pack-kept-objects and .keep packs' '
434+
rm -rf keep-packs keep-packs.git &&
435+
prepare_for_keep_packs &&
436+
(
437+
cd keep-packs.git &&
438+
439+
foo_pack=$(test-tool find-pack -c 1 HEAD:foo.t) &&
440+
bar_pack=$(test-tool find-pack -c 1 HEAD:bar.t) &&
441+
head_pack=$(test-tool find-pack -c 1 HEAD) &&
442+
443+
test "$foo_pack" != "$bar_pack" &&
444+
test "$foo_pack" = "$head_pack" &&
445+
446+
git -c repack.writebitmaps=false repack -a -d --filter=blob:none \
447+
--pack-kept-objects &&
448+
449+
foo_pack_1=$(test-tool find-pack -c 1 HEAD:foo.t) &&
450+
test-tool find-pack -c 2 HEAD:bar.t >bar_pack_1 &&
451+
head_pack_1=$(test-tool find-pack -c 1 HEAD) &&
452+
453+
test "$foo_pack_1" != "$foo_pack" &&
454+
test "$foo_pack_1" != "$bar_pack" &&
455+
test "$head_pack_1" != "$head_pack" &&
456+
457+
# Object bar is in both the old .keep pack and the new
458+
# pack that contained the filtered out objects
459+
grep "$bar_pack" bar_pack_1 &&
460+
grep "$foo_pack_1" bar_pack_1 &&
461+
test "$foo_pack_1" != "$head_pack_1"
462+
)
463+
'
464+
330465
objdir=.git/objects
331466
midx=$objdir/pack/multi-pack-index
332467

0 commit comments

Comments
 (0)