Skip to content

Commit efb8c31

Browse files
committed
Merge branch 'tb/midx-avoid-cruft-packs' into jch
"pack-objects" has been taught to avoid pointing into objects in cruft packs from midx. Ready? * tb/midx-avoid-cruft-packs: repack: exclude cruft pack(s) from the MIDX where possible pack-objects: introduce '--stdin-packs=follow' pack-objects: swap 'show_{object,commit}_pack_hint' pack-objects: fix typo in 'show_object_pack_hint()' pack-objects: perform name-hash traversal for unpacked objects pack-objects: declare 'rev_info' for '--stdin-packs' earlier pack-objects: factor out handling '--stdin-packs' pack-objects: limit scope in 'add_object_entry_from_pack()' pack-objects: use standard option incompatibility functions
2 parents 9825e05 + 5c16683 commit efb8c31

File tree

6 files changed

+453
-87
lines changed

6 files changed

+453
-87
lines changed

Documentation/config/repack.adoc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,10 @@ repack.cruftThreads::
3939
a cruft pack and the respective parameters are not given over
4040
the command line. See similarly named `pack.*` configuration
4141
variables for defaults and meaning.
42+
43+
repack.midxMustContainCruft::
44+
When set to true, linkgit:git-repack[1] will unconditionally include
45+
cruft pack(s), if any, in the multi-pack index when invoked with
46+
`--write-midx`. When false, cruft packs are only included in the MIDX
47+
when necessary (e.g., because they might be required to form a
48+
reachability closure with MIDX bitmaps). Defaults to true.

Documentation/git-pack-objects.adoc

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,13 +87,21 @@ base-name::
8787
reference was included in the resulting packfile. This
8888
can be useful to send new tags to native Git clients.
8989

90-
--stdin-packs::
90+
--stdin-packs[=<mode>]::
9191
Read the basenames of packfiles (e.g., `pack-1234abcd.pack`)
9292
from the standard input, instead of object names or revision
9393
arguments. The resulting pack contains all objects listed in the
9494
included packs (those not beginning with `^`), excluding any
9595
objects listed in the excluded packs (beginning with `^`).
9696
+
97+
When `mode` is "follow", objects from packs not listed on stdin receive
98+
special treatment. Objects within unlisted packs will be included if
99+
those objects are (1) reachable from the included packs, and (2) not
100+
found in any excluded packs. This mode is useful, for example, to
101+
resurrect once-unreachable objects found in cruft packs to generate
102+
packs which are closed under reachability up to the boundary set by the
103+
excluded packs.
104+
+
97105
Incompatible with `--revs`, or options that imply `--revs` (such as
98106
`--all`), with the exception of `--unpacked`, which is compatible.
99107

builtin/pack-objects.c

Lines changed: 119 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,12 @@ static struct oidmap configured_exclusions;
284284
static struct oidset excluded_by_config;
285285
static int name_hash_version = -1;
286286

287+
enum stdin_packs_mode {
288+
STDIN_PACKS_MODE_NONE,
289+
STDIN_PACKS_MODE_STANDARD,
290+
STDIN_PACKS_MODE_FOLLOW,
291+
};
292+
287293
/**
288294
* Check whether the name_hash_version chosen by user input is appropriate,
289295
* and also validate whether it is compatible with other features.
@@ -3725,14 +3731,14 @@ static int add_object_entry_from_pack(const struct object_id *oid,
37253731
return 0;
37263732

37273733
if (p) {
3728-
struct rev_info *revs = _data;
37293734
struct object_info oi = OBJECT_INFO_INIT;
37303735

37313736
oi.typep = &type;
37323737
if (packed_object_info(the_repository, p, ofs, &oi) < 0) {
37333738
die(_("could not get type of object %s in pack %s"),
37343739
oid_to_hex(oid), p->pack_name);
37353740
} else if (type == OBJ_COMMIT) {
3741+
struct rev_info *revs = _data;
37363742
/*
37373743
* commits in included packs are used as starting points for the
37383744
* subsequent revision walk
@@ -3748,32 +3754,45 @@ static int add_object_entry_from_pack(const struct object_id *oid,
37483754
return 0;
37493755
}
37503756

3751-
static void show_commit_pack_hint(struct commit *commit UNUSED,
3752-
void *data UNUSED)
3757+
static void show_object_pack_hint(struct object *object, const char *name,
3758+
void *data)
37533759
{
3754-
/* nothing to do; commits don't have a namehash */
3760+
enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data;
3761+
if (mode == STDIN_PACKS_MODE_FOLLOW) {
3762+
add_object_entry(&object->oid, object->type, name, 0);
3763+
} else {
3764+
struct object_entry *oe = packlist_find(&to_pack, &object->oid);
3765+
if (!oe)
3766+
return;
3767+
3768+
/*
3769+
* Our 'to_pack' list was constructed by iterating all
3770+
* objects packed in included packs, and so doesn't have
3771+
* a non-zero hash field that you would typically pick
3772+
* up during a reachability traversal.
3773+
*
3774+
* Make a best-effort attempt to fill in the ->hash and
3775+
* ->no_try_delta fields here in order to perhaps
3776+
* improve the delta selection process.
3777+
*/
3778+
oe->hash = pack_name_hash_fn(name);
3779+
oe->no_try_delta = name && no_try_delta(name);
3780+
3781+
stdin_packs_hints_nr++;
3782+
}
37553783
}
37563784

3757-
static void show_object_pack_hint(struct object *object, const char *name,
3758-
void *data UNUSED)
3785+
static void show_commit_pack_hint(struct commit *commit, void *data)
37593786
{
3760-
struct object_entry *oe = packlist_find(&to_pack, &object->oid);
3761-
if (!oe)
3787+
enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data;
3788+
3789+
if (mode == STDIN_PACKS_MODE_FOLLOW) {
3790+
show_object_pack_hint((struct object *)commit, "", data);
37623791
return;
3792+
}
37633793

3764-
/*
3765-
* Our 'to_pack' list was constructed by iterating all objects packed in
3766-
* included packs, and so doesn't have a non-zero hash field that you
3767-
* would typically pick up during a reachability traversal.
3768-
*
3769-
* Make a best-effort attempt to fill in the ->hash and ->no_try_delta
3770-
* here using a now in order to perhaps improve the delta selection
3771-
* process.
3772-
*/
3773-
oe->hash = pack_name_hash_fn(name);
3774-
oe->no_try_delta = name && no_try_delta(name);
3794+
/* nothing to do; commits don't have a namehash */
37753795

3776-
stdin_packs_hints_nr++;
37773796
}
37783797

37793798
static int pack_mtime_cmp(const void *_a, const void *_b)
@@ -3793,32 +3812,14 @@ static int pack_mtime_cmp(const void *_a, const void *_b)
37933812
return 0;
37943813
}
37953814

3796-
static void read_packs_list_from_stdin(void)
3815+
static void read_packs_list_from_stdin(struct rev_info *revs)
37973816
{
37983817
struct strbuf buf = STRBUF_INIT;
37993818
struct string_list include_packs = STRING_LIST_INIT_DUP;
38003819
struct string_list exclude_packs = STRING_LIST_INIT_DUP;
38013820
struct string_list_item *item = NULL;
38023821

38033822
struct packed_git *p;
3804-
struct rev_info revs;
3805-
3806-
repo_init_revisions(the_repository, &revs, NULL);
3807-
/*
3808-
* Use a revision walk to fill in the namehash of objects in the include
3809-
* packs. To save time, we'll avoid traversing through objects that are
3810-
* in excluded packs.
3811-
*
3812-
* That may cause us to avoid populating all of the namehash fields of
3813-
* all included objects, but our goal is best-effort, since this is only
3814-
* an optimization during delta selection.
3815-
*/
3816-
revs.no_kept_objects = 1;
3817-
revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
3818-
revs.blob_objects = 1;
3819-
revs.tree_objects = 1;
3820-
revs.tag_objects = 1;
3821-
revs.ignore_missing_links = 1;
38223823

38233824
while (strbuf_getline(&buf, stdin) != EOF) {
38243825
if (!buf.len)
@@ -3888,25 +3889,55 @@ static void read_packs_list_from_stdin(void)
38883889
struct packed_git *p = item->util;
38893890
for_each_object_in_pack(p,
38903891
add_object_entry_from_pack,
3891-
&revs,
3892+
revs,
38923893
FOR_EACH_OBJECT_PACK_ORDER);
38933894
}
38943895

3896+
strbuf_release(&buf);
3897+
string_list_clear(&include_packs, 0);
3898+
string_list_clear(&exclude_packs, 0);
3899+
}
3900+
3901+
static void add_unreachable_loose_objects(struct rev_info *revs);
3902+
3903+
static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked)
3904+
{
3905+
struct rev_info revs;
3906+
3907+
repo_init_revisions(the_repository, &revs, NULL);
3908+
/*
3909+
* Use a revision walk to fill in the namehash of objects in the include
3910+
* packs. To save time, we'll avoid traversing through objects that are
3911+
* in excluded packs.
3912+
*
3913+
* That may cause us to avoid populating all of the namehash fields of
3914+
* all included objects, but our goal is best-effort, since this is only
3915+
* an optimization during delta selection.
3916+
*/
3917+
revs.no_kept_objects = 1;
3918+
revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
3919+
revs.blob_objects = 1;
3920+
revs.tree_objects = 1;
3921+
revs.tag_objects = 1;
3922+
revs.ignore_missing_links = 1;
3923+
3924+
/* avoids adding objects in excluded packs */
3925+
ignore_packed_keep_in_core = 1;
3926+
read_packs_list_from_stdin(&revs);
3927+
if (rev_list_unpacked)
3928+
add_unreachable_loose_objects(&revs);
3929+
38953930
if (prepare_revision_walk(&revs))
38963931
die(_("revision walk setup failed"));
38973932
traverse_commit_list(&revs,
38983933
show_commit_pack_hint,
38993934
show_object_pack_hint,
3900-
NULL);
3935+
&mode);
39013936

39023937
trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found",
39033938
stdin_packs_found_nr);
39043939
trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints",
39053940
stdin_packs_hints_nr);
3906-
3907-
strbuf_release(&buf);
3908-
string_list_clear(&include_packs, 0);
3909-
string_list_clear(&exclude_packs, 0);
39103941
}
39113942

39123943
static void add_cruft_object_entry(const struct object_id *oid, enum object_type type,
@@ -4004,7 +4035,6 @@ static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep)
40044035
}
40054036
}
40064037

4007-
static void add_unreachable_loose_objects(void);
40084038
static void add_objects_in_unpacked_packs(void);
40094039

40104040
static void enumerate_cruft_objects(void)
@@ -4014,7 +4044,7 @@ static void enumerate_cruft_objects(void)
40144044
_("Enumerating cruft objects"), 0);
40154045

40164046
add_objects_in_unpacked_packs();
4017-
add_unreachable_loose_objects();
4047+
add_unreachable_loose_objects(NULL);
40184048

40194049
stop_progress(&progress_state);
40204050
}
@@ -4292,8 +4322,9 @@ static void add_objects_in_unpacked_packs(void)
42924322
}
42934323

42944324
static int add_loose_object(const struct object_id *oid, const char *path,
4295-
void *data UNUSED)
4325+
void *data)
42964326
{
4327+
struct rev_info *revs = data;
42974328
enum object_type type = oid_object_info(the_repository, oid, NULL);
42984329

42994330
if (type < 0) {
@@ -4314,6 +4345,10 @@ static int add_loose_object(const struct object_id *oid, const char *path,
43144345
} else {
43154346
add_object_entry(oid, type, "", 0);
43164347
}
4348+
4349+
if (revs && type == OBJ_COMMIT)
4350+
add_pending_oid(revs, NULL, oid, 0);
4351+
43174352
return 0;
43184353
}
43194354

@@ -4322,11 +4357,10 @@ static int add_loose_object(const struct object_id *oid, const char *path,
43224357
* add_object_entry will weed out duplicates, so we just add every
43234358
* loose object we find.
43244359
*/
4325-
static void add_unreachable_loose_objects(void)
4360+
static void add_unreachable_loose_objects(struct rev_info *revs)
43264361
{
43274362
for_each_loose_file_in_objdir(repo_get_object_directory(the_repository),
4328-
add_loose_object,
4329-
NULL, NULL, NULL);
4363+
add_loose_object, NULL, NULL, revs);
43304364
}
43314365

43324366
static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid)
@@ -4673,7 +4707,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av)
46734707
if (keep_unreachable)
46744708
add_objects_in_unpacked_packs();
46754709
if (pack_loose_unreachable)
4676-
add_unreachable_loose_objects();
4710+
add_unreachable_loose_objects(NULL);
46774711
if (unpack_unreachable)
46784712
loosen_unused_packed_objects();
46794713

@@ -4780,6 +4814,23 @@ static int is_not_in_promisor_pack(struct commit *commit, void *data) {
47804814
return is_not_in_promisor_pack_obj((struct object *) commit, data);
47814815
}
47824816

4817+
static int parse_stdin_packs_mode(const struct option *opt, const char *arg,
4818+
int unset)
4819+
{
4820+
enum stdin_packs_mode *mode = opt->value;
4821+
4822+
if (unset)
4823+
*mode = STDIN_PACKS_MODE_NONE;
4824+
else if (!arg || !*arg)
4825+
*mode = STDIN_PACKS_MODE_STANDARD;
4826+
else if (!strcmp(arg, "follow"))
4827+
*mode = STDIN_PACKS_MODE_FOLLOW;
4828+
else
4829+
die(_("invalid value for '%s': '%s'"), opt->long_name, arg);
4830+
4831+
return 0;
4832+
}
4833+
47834834
int cmd_pack_objects(int argc,
47844835
const char **argv,
47854836
const char *prefix,
@@ -4790,7 +4841,7 @@ int cmd_pack_objects(int argc,
47904841
struct strvec rp = STRVEC_INIT;
47914842
int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
47924843
int rev_list_index = 0;
4793-
int stdin_packs = 0;
4844+
enum stdin_packs_mode stdin_packs = STDIN_PACKS_MODE_NONE;
47944845
struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
47954846
struct list_objects_filter_options filter_options =
47964847
LIST_OBJECTS_FILTER_INIT;
@@ -4845,6 +4896,9 @@ int cmd_pack_objects(int argc,
48454896
OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
48464897
N_("include objects referred to by the index"),
48474898
1, PARSE_OPT_NONEG),
4899+
OPT_CALLBACK_F(0, "stdin-packs", &stdin_packs, N_("mode"),
4900+
N_("read packs from stdin"),
4901+
PARSE_OPT_OPTARG, parse_stdin_packs_mode),
48484902
OPT_BOOL(0, "stdin-packs", &stdin_packs,
48494903
N_("read packs from stdin")),
48504904
OPT_BOOL(0, "stdout", &pack_to_stdout,
@@ -5010,9 +5064,10 @@ int cmd_pack_objects(int argc,
50105064
strvec_push(&rp, "--unpacked");
50115065
}
50125066

5013-
if (exclude_promisor_objects && exclude_promisor_objects_best_effort)
5014-
die(_("options '%s' and '%s' cannot be used together"),
5015-
"--exclude-promisor-objects", "--exclude-promisor-objects-best-effort");
5067+
die_for_incompatible_opt2(exclude_promisor_objects,
5068+
"--exclude-promisor-objects",
5069+
exclude_promisor_objects_best_effort,
5070+
"--exclude-promisor-objects-best-effort");
50165071
if (exclude_promisor_objects) {
50175072
use_internal_rev_list = 1;
50185073
fetch_if_missing = 0;
@@ -5050,22 +5105,23 @@ int cmd_pack_objects(int argc,
50505105
if (!pack_to_stdout && thin)
50515106
die(_("--thin cannot be used to build an indexable pack"));
50525107

5053-
if (keep_unreachable && unpack_unreachable)
5054-
die(_("options '%s' and '%s' cannot be used together"), "--keep-unreachable", "--unpack-unreachable");
5108+
die_for_incompatible_opt2(keep_unreachable, "--keep-unreachable",
5109+
unpack_unreachable, "--unpack-unreachable");
50555110
if (!rev_list_all || !rev_list_reflog || !rev_list_index)
50565111
unpack_unreachable_expiration = 0;
50575112

5058-
if (stdin_packs && filter_options.choice)
5059-
die(_("cannot use --filter with --stdin-packs"));
5113+
die_for_incompatible_opt2(stdin_packs, "--stdin-packs",
5114+
filter_options.choice, "--filter");
5115+
50605116

50615117
if (stdin_packs && use_internal_rev_list)
50625118
die(_("cannot use internal rev list with --stdin-packs"));
50635119

50645120
if (cruft) {
50655121
if (use_internal_rev_list)
50665122
die(_("cannot use internal rev list with --cruft"));
5067-
if (stdin_packs)
5068-
die(_("cannot use --stdin-packs with --cruft"));
5123+
die_for_incompatible_opt2(stdin_packs, "--stdin-packs",
5124+
cruft, "--cruft");
50695125
}
50705126

50715127
/*
@@ -5133,11 +5189,7 @@ int cmd_pack_objects(int argc,
51335189
progress_state = start_progress(the_repository,
51345190
_("Enumerating objects"), 0);
51355191
if (stdin_packs) {
5136-
/* avoids adding objects in excluded packs */
5137-
ignore_packed_keep_in_core = 1;
5138-
read_packs_list_from_stdin();
5139-
if (rev_list_unpacked)
5140-
add_unreachable_loose_objects();
5192+
read_stdin_packs(stdin_packs, rev_list_unpacked);
51415193
} else if (cruft) {
51425194
read_cruft_objects();
51435195
} else if (!use_internal_rev_list) {

0 commit comments

Comments
 (0)