Skip to content

Commit 070087a

Browse files
committed
Merge branch 'tb/midx-avoid-cruft-packs' into jch
"pack-objects" has been taught to avoid pointing into objects in cruft packs from midx. Comments? * tb/midx-avoid-cruft-packs: repack: exclude cruft pack(s) from the MIDX where possible pack-objects: introduce '--stdin-packs=follow' pack-objects: swap 'show_{object,commit}_pack_hint' pack-objects: fix typo in 'show_object_pack_hint()' pack-objects: perform name-hash traversal for unpacked objects pack-objects: declare 'rev_info' for '--stdin-packs' earlier pack-objects: factor out handling '--stdin-packs' pack-objects: limit scope in 'add_object_entry_from_pack()' pack-objects: use standard option incompatibility functions
2 parents 13f8dc3 + 2973347 commit 070087a

File tree

6 files changed

+454
-88
lines changed

6 files changed

+454
-88
lines changed

Documentation/config/repack.adoc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,10 @@ repack.cruftThreads::
3939
a cruft pack and the respective parameters are not given over
4040
the command line. See similarly named `pack.*` configuration
4141
variables for defaults and meaning.
42+
43+
repack.midxMustContainCruft::
44+
When set to true, linkgit:git-repack[1] will unconditionally include
45+
cruft pack(s), if any, in the multi-pack index when invoked with
46+
`--write-midx`. When false, cruft packs are only included in the MIDX
47+
when necessary (e.g., because they might be required to form a
48+
reachability closure with MIDX bitmaps). Defaults to true.

Documentation/git-pack-objects.adoc

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,13 +87,21 @@ base-name::
8787
reference was included in the resulting packfile. This
8888
can be useful to send new tags to native Git clients.
8989

90-
--stdin-packs::
90+
--stdin-packs[=<mode>]::
9191
Read the basenames of packfiles (e.g., `pack-1234abcd.pack`)
9292
from the standard input, instead of object names or revision
9393
arguments. The resulting pack contains all objects listed in the
9494
included packs (those not beginning with `^`), excluding any
9595
objects listed in the excluded packs (beginning with `^`).
9696
+
97+
When `mode` is "follow", objects from packs not listed on stdin receive
98+
special treatment. Objects within unlisted packs will be included if
99+
those objects are (1) reachable from the included packs, and (2) not
100+
found in any excluded packs. This mode is useful, for example, to
101+
resurrect once-unreachable objects found in cruft packs to generate
102+
packs which are closed under reachability up to the boundary set by the
103+
excluded packs.
104+
+
97105
Incompatible with `--revs`, or options that imply `--revs` (such as
98106
`--all`), with the exception of `--unpacked`, which is compatible.
99107

builtin/pack-objects.c

Lines changed: 120 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,12 @@ static struct oidmap configured_exclusions;
283283
static struct oidset excluded_by_config;
284284
static int name_hash_version = -1;
285285

286+
enum stdin_packs_mode {
287+
STDIN_PACKS_MODE_NONE,
288+
STDIN_PACKS_MODE_STANDARD,
289+
STDIN_PACKS_MODE_FOLLOW,
290+
};
291+
286292
/**
287293
* Check whether the name_hash_version chosen by user input is appropriate,
288294
* and also validate whether it is compatible with other features.
@@ -3721,14 +3727,14 @@ static int add_object_entry_from_pack(const struct object_id *oid,
37213727
return 0;
37223728

37233729
if (p) {
3724-
struct rev_info *revs = _data;
37253730
struct object_info oi = OBJECT_INFO_INIT;
3726-
37273731
oi.typep = &type;
3732+
37283733
if (packed_object_info(the_repository, p, ofs, &oi) < 0) {
37293734
die(_("could not get type of object %s in pack %s"),
37303735
oid_to_hex(oid), p->pack_name);
37313736
} else if (type == OBJ_COMMIT) {
3737+
struct rev_info *revs = _data;
37323738
/*
37333739
* commits in included packs are used as starting points for the
37343740
* subsequent revision walk
@@ -3744,32 +3750,45 @@ static int add_object_entry_from_pack(const struct object_id *oid,
37443750
return 0;
37453751
}
37463752

3747-
static void show_commit_pack_hint(struct commit *commit UNUSED,
3748-
void *data UNUSED)
3753+
static void show_object_pack_hint(struct object *object, const char *name,
3754+
void *data)
37493755
{
3750-
/* nothing to do; commits don't have a namehash */
3756+
enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data;
3757+
if (mode == STDIN_PACKS_MODE_FOLLOW) {
3758+
add_object_entry(&object->oid, object->type, name, 0);
3759+
} else {
3760+
struct object_entry *oe = packlist_find(&to_pack, &object->oid);
3761+
if (!oe)
3762+
return;
3763+
3764+
/*
3765+
* Our 'to_pack' list was constructed by iterating all
3766+
* objects packed in included packs, and so doesn't have
3767+
* a non-zero hash field that you would typically pick
3768+
* up during a reachability traversal.
3769+
*
3770+
* Make a best-effort attempt to fill in the ->hash and
3771+
* ->no_try_delta fields here in order to perhaps
3772+
* improve the delta selection process.
3773+
*/
3774+
oe->hash = pack_name_hash_fn(name);
3775+
oe->no_try_delta = name && no_try_delta(name);
3776+
3777+
stdin_packs_hints_nr++;
3778+
}
37513779
}
37523780

3753-
static void show_object_pack_hint(struct object *object, const char *name,
3754-
void *data UNUSED)
3781+
static void show_commit_pack_hint(struct commit *commit, void *data)
37553782
{
3756-
struct object_entry *oe = packlist_find(&to_pack, &object->oid);
3757-
if (!oe)
3783+
enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data;
3784+
3785+
if (mode == STDIN_PACKS_MODE_FOLLOW) {
3786+
show_object_pack_hint((struct object *)commit, "", data);
37583787
return;
3788+
}
37593789

3760-
/*
3761-
* Our 'to_pack' list was constructed by iterating all objects packed in
3762-
* included packs, and so doesn't have a non-zero hash field that you
3763-
* would typically pick up during a reachability traversal.
3764-
*
3765-
* Make a best-effort attempt to fill in the ->hash and ->no_try_delta
3766-
* here using a now in order to perhaps improve the delta selection
3767-
* process.
3768-
*/
3769-
oe->hash = pack_name_hash_fn(name);
3770-
oe->no_try_delta = name && no_try_delta(name);
3790+
/* nothing to do; commits don't have a namehash */
37713791

3772-
stdin_packs_hints_nr++;
37733792
}
37743793

37753794
static int pack_mtime_cmp(const void *_a, const void *_b)
@@ -3789,32 +3808,14 @@ static int pack_mtime_cmp(const void *_a, const void *_b)
37893808
return 0;
37903809
}
37913810

3792-
static void read_packs_list_from_stdin(void)
3811+
static void read_packs_list_from_stdin(struct rev_info *revs)
37933812
{
37943813
struct strbuf buf = STRBUF_INIT;
37953814
struct string_list include_packs = STRING_LIST_INIT_DUP;
37963815
struct string_list exclude_packs = STRING_LIST_INIT_DUP;
37973816
struct string_list_item *item = NULL;
37983817

37993818
struct packed_git *p;
3800-
struct rev_info revs;
3801-
3802-
repo_init_revisions(the_repository, &revs, NULL);
3803-
/*
3804-
* Use a revision walk to fill in the namehash of objects in the include
3805-
* packs. To save time, we'll avoid traversing through objects that are
3806-
* in excluded packs.
3807-
*
3808-
* That may cause us to avoid populating all of the namehash fields of
3809-
* all included objects, but our goal is best-effort, since this is only
3810-
* an optimization during delta selection.
3811-
*/
3812-
revs.no_kept_objects = 1;
3813-
revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
3814-
revs.blob_objects = 1;
3815-
revs.tree_objects = 1;
3816-
revs.tag_objects = 1;
3817-
revs.ignore_missing_links = 1;
38183819

38193820
while (strbuf_getline(&buf, stdin) != EOF) {
38203821
if (!buf.len)
@@ -3884,25 +3885,55 @@ static void read_packs_list_from_stdin(void)
38843885
struct packed_git *p = item->util;
38853886
for_each_object_in_pack(p,
38863887
add_object_entry_from_pack,
3887-
&revs,
3888+
revs,
38883889
FOR_EACH_OBJECT_PACK_ORDER);
38893890
}
38903891

3892+
strbuf_release(&buf);
3893+
string_list_clear(&include_packs, 0);
3894+
string_list_clear(&exclude_packs, 0);
3895+
}
3896+
3897+
static void add_unreachable_loose_objects(struct rev_info *revs);
3898+
3899+
static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked)
3900+
{
3901+
struct rev_info revs;
3902+
3903+
repo_init_revisions(the_repository, &revs, NULL);
3904+
/*
3905+
* Use a revision walk to fill in the namehash of objects in the include
3906+
* packs. To save time, we'll avoid traversing through objects that are
3907+
* in excluded packs.
3908+
*
3909+
* That may cause us to avoid populating all of the namehash fields of
3910+
* all included objects, but our goal is best-effort, since this is only
3911+
* an optimization during delta selection.
3912+
*/
3913+
revs.no_kept_objects = 1;
3914+
revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
3915+
revs.blob_objects = 1;
3916+
revs.tree_objects = 1;
3917+
revs.tag_objects = 1;
3918+
revs.ignore_missing_links = 1;
3919+
3920+
/* avoids adding objects in excluded packs */
3921+
ignore_packed_keep_in_core = 1;
3922+
read_packs_list_from_stdin(&revs);
3923+
if (rev_list_unpacked)
3924+
add_unreachable_loose_objects(&revs);
3925+
38913926
if (prepare_revision_walk(&revs))
38923927
die(_("revision walk setup failed"));
38933928
traverse_commit_list(&revs,
38943929
show_commit_pack_hint,
38953930
show_object_pack_hint,
3896-
NULL);
3931+
&mode);
38973932

38983933
trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found",
38993934
stdin_packs_found_nr);
39003935
trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints",
39013936
stdin_packs_hints_nr);
3902-
3903-
strbuf_release(&buf);
3904-
string_list_clear(&include_packs, 0);
3905-
string_list_clear(&exclude_packs, 0);
39063937
}
39073938

39083939
static void add_cruft_object_entry(const struct object_id *oid, enum object_type type,
@@ -4000,7 +4031,6 @@ static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep)
40004031
}
40014032
}
40024033

4003-
static void add_unreachable_loose_objects(void);
40044034
static void add_objects_in_unpacked_packs(void);
40054035

40064036
static void enumerate_cruft_objects(void)
@@ -4010,7 +4040,7 @@ static void enumerate_cruft_objects(void)
40104040
_("Enumerating cruft objects"), 0);
40114041

40124042
add_objects_in_unpacked_packs();
4013-
add_unreachable_loose_objects();
4043+
add_unreachable_loose_objects(NULL);
40144044

40154045
stop_progress(&progress_state);
40164046
}
@@ -4288,8 +4318,9 @@ static void add_objects_in_unpacked_packs(void)
42884318
}
42894319

42904320
static int add_loose_object(const struct object_id *oid, const char *path,
4291-
void *data UNUSED)
4321+
void *data)
42924322
{
4323+
struct rev_info *revs = data;
42934324
enum object_type type = oid_object_info(the_repository, oid, NULL);
42944325

42954326
if (type < 0) {
@@ -4310,6 +4341,10 @@ static int add_loose_object(const struct object_id *oid, const char *path,
43104341
} else {
43114342
add_object_entry(oid, type, "", 0);
43124343
}
4344+
4345+
if (revs && type == OBJ_COMMIT)
4346+
add_pending_oid(revs, NULL, oid, 0);
4347+
43134348
return 0;
43144349
}
43154350

@@ -4318,11 +4353,10 @@ static int add_loose_object(const struct object_id *oid, const char *path,
43184353
* add_object_entry will weed out duplicates, so we just add every
43194354
* loose object we find.
43204355
*/
4321-
static void add_unreachable_loose_objects(void)
4356+
static void add_unreachable_loose_objects(struct rev_info *revs)
43224357
{
43234358
for_each_loose_file_in_objdir(repo_get_object_directory(the_repository),
4324-
add_loose_object,
4325-
NULL, NULL, NULL);
4359+
add_loose_object, NULL, NULL, revs);
43264360
}
43274361

43284362
static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid)
@@ -4665,7 +4699,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av)
46654699
if (keep_unreachable)
46664700
add_objects_in_unpacked_packs();
46674701
if (pack_loose_unreachable)
4668-
add_unreachable_loose_objects();
4702+
add_unreachable_loose_objects(NULL);
46694703
if (unpack_unreachable)
46704704
loosen_unused_packed_objects();
46714705

@@ -4772,6 +4806,23 @@ static int is_not_in_promisor_pack(struct commit *commit, void *data) {
47724806
return is_not_in_promisor_pack_obj((struct object *) commit, data);
47734807
}
47744808

4809+
static int parse_stdin_packs_mode(const struct option *opt, const char *arg,
4810+
int unset)
4811+
{
4812+
enum stdin_packs_mode *mode = opt->value;
4813+
4814+
if (unset)
4815+
*mode = STDIN_PACKS_MODE_NONE;
4816+
else if (!arg || !*arg)
4817+
*mode = STDIN_PACKS_MODE_STANDARD;
4818+
else if (!strcmp(arg, "follow"))
4819+
*mode = STDIN_PACKS_MODE_FOLLOW;
4820+
else
4821+
die(_("invalid value for '%s': '%s'"), opt->long_name, arg);
4822+
4823+
return 0;
4824+
}
4825+
47754826
int cmd_pack_objects(int argc,
47764827
const char **argv,
47774828
const char *prefix,
@@ -4782,7 +4833,7 @@ int cmd_pack_objects(int argc,
47824833
struct strvec rp = STRVEC_INIT;
47834834
int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
47844835
int rev_list_index = 0;
4785-
int stdin_packs = 0;
4836+
enum stdin_packs_mode stdin_packs = STDIN_PACKS_MODE_NONE;
47864837
struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
47874838
struct list_objects_filter_options filter_options =
47884839
LIST_OBJECTS_FILTER_INIT;
@@ -4837,6 +4888,9 @@ int cmd_pack_objects(int argc,
48374888
OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
48384889
N_("include objects referred to by the index"),
48394890
1, PARSE_OPT_NONEG),
4891+
OPT_CALLBACK_F(0, "stdin-packs", &stdin_packs, N_("mode"),
4892+
N_("read packs from stdin"),
4893+
PARSE_OPT_OPTARG, parse_stdin_packs_mode),
48404894
OPT_BOOL(0, "stdin-packs", &stdin_packs,
48414895
N_("read packs from stdin")),
48424896
OPT_BOOL(0, "stdout", &pack_to_stdout,
@@ -4997,9 +5051,10 @@ int cmd_pack_objects(int argc,
49975051
strvec_push(&rp, "--unpacked");
49985052
}
49995053

5000-
if (exclude_promisor_objects && exclude_promisor_objects_best_effort)
5001-
die(_("options '%s' and '%s' cannot be used together"),
5002-
"--exclude-promisor-objects", "--exclude-promisor-objects-best-effort");
5054+
die_for_incompatible_opt2(exclude_promisor_objects,
5055+
"--exclude-promisor-objects",
5056+
exclude_promisor_objects_best_effort,
5057+
"--exclude-promisor-objects-best-effort");
50035058
if (exclude_promisor_objects) {
50045059
use_internal_rev_list = 1;
50055060
fetch_if_missing = 0;
@@ -5037,22 +5092,23 @@ int cmd_pack_objects(int argc,
50375092
if (!pack_to_stdout && thin)
50385093
die(_("--thin cannot be used to build an indexable pack"));
50395094

5040-
if (keep_unreachable && unpack_unreachable)
5041-
die(_("options '%s' and '%s' cannot be used together"), "--keep-unreachable", "--unpack-unreachable");
5095+
die_for_incompatible_opt2(keep_unreachable, "--keep-unreachable",
5096+
unpack_unreachable, "--unpack-unreachable");
50425097
if (!rev_list_all || !rev_list_reflog || !rev_list_index)
50435098
unpack_unreachable_expiration = 0;
50445099

5045-
if (stdin_packs && filter_options.choice)
5046-
die(_("cannot use --filter with --stdin-packs"));
5100+
die_for_incompatible_opt2(stdin_packs, "--stdin-packs",
5101+
filter_options.choice, "--filter");
5102+
50475103

50485104
if (stdin_packs && use_internal_rev_list)
50495105
die(_("cannot use internal rev list with --stdin-packs"));
50505106

50515107
if (cruft) {
50525108
if (use_internal_rev_list)
50535109
die(_("cannot use internal rev list with --cruft"));
5054-
if (stdin_packs)
5055-
die(_("cannot use --stdin-packs with --cruft"));
5110+
die_for_incompatible_opt2(stdin_packs, "--stdin-packs",
5111+
cruft, "--cruft");
50565112
}
50575113

50585114
/*
@@ -5120,11 +5176,7 @@ int cmd_pack_objects(int argc,
51205176
progress_state = start_progress(the_repository,
51215177
_("Enumerating objects"), 0);
51225178
if (stdin_packs) {
5123-
/* avoids adding objects in excluded packs */
5124-
ignore_packed_keep_in_core = 1;
5125-
read_packs_list_from_stdin();
5126-
if (rev_list_unpacked)
5127-
add_unreachable_loose_objects();
5179+
read_stdin_packs(stdin_packs, rev_list_unpacked);
51285180
} else if (cruft) {
51295181
read_cruft_objects();
51305182
} else if (!use_internal_rev_list) {

0 commit comments

Comments
 (0)