Skip to content

Commit 205493d

Browse files
committed
Merge branch 'tb/midx-avoid-cruft-packs'
"pack-objects" has been taught to avoid pointing into objects in cruft packs from midx. * tb/midx-avoid-cruft-packs: repack: exclude cruft pack(s) from the MIDX where possible pack-objects: introduce '--stdin-packs=follow' pack-objects: swap 'show_{object,commit}_pack_hint' pack-objects: fix typo in 'show_object_pack_hint()' pack-objects: perform name-hash traversal for unpacked objects pack-objects: declare 'rev_info' for '--stdin-packs' earlier pack-objects: factor out handling '--stdin-packs' pack-objects: limit scope in 'add_object_entry_from_pack()' pack-objects: use standard option incompatibility functions
2 parents a636d39 + 5ee86c2 commit 205493d

File tree

6 files changed

+571
-89
lines changed

6 files changed

+571
-89
lines changed

Documentation/config/repack.adoc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,10 @@ repack.cruftThreads::
3939
a cruft pack and the respective parameters are not given over
4040
the command line. See similarly named `pack.*` configuration
4141
variables for defaults and meaning.
42+
43+
repack.midxMustContainCruft::
44+
When set to true, linkgit:git-repack[1] will unconditionally include
45+
cruft pack(s), if any, in the multi-pack index when invoked with
46+
`--write-midx`. When false, cruft packs are only included in the MIDX
47+
when necessary (e.g., because they might be required to form a
48+
reachability closure with MIDX bitmaps). Defaults to true.

Documentation/git-pack-objects.adoc

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,13 +87,21 @@ base-name::
8787
reference was included in the resulting packfile. This
8888
can be useful to send new tags to native Git clients.
8989

90-
--stdin-packs::
90+
--stdin-packs[=<mode>]::
9191
Read the basenames of packfiles (e.g., `pack-1234abcd.pack`)
9292
from the standard input, instead of object names or revision
9393
arguments. The resulting pack contains all objects listed in the
9494
included packs (those not beginning with `^`), excluding any
9595
objects listed in the excluded packs (beginning with `^`).
9696
+
97+
When `mode` is "follow", objects from packs not listed on stdin receive
98+
special treatment. Objects within unlisted packs will be included if
99+
those objects are (1) reachable from the included packs, and (2) not
100+
found in any excluded packs. This mode is useful, for example, to
101+
resurrect once-unreachable objects found in cruft packs to generate
102+
packs which are closed under reachability up to the boundary set by the
103+
excluded packs.
104+
+
97105
Incompatible with `--revs`, or options that imply `--revs` (such as
98106
`--all`), with the exception of `--unpacked`, which is compatible.
99107

builtin/pack-objects.c

Lines changed: 122 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,12 @@ static struct oidmap configured_exclusions;
284284
static struct oidset excluded_by_config;
285285
static int name_hash_version = -1;
286286

287+
enum stdin_packs_mode {
288+
STDIN_PACKS_MODE_NONE,
289+
STDIN_PACKS_MODE_STANDARD,
290+
STDIN_PACKS_MODE_FOLLOW,
291+
};
292+
287293
/**
288294
* Check whether the name_hash_version chosen by user input is appropriate,
289295
* and also validate whether it is compatible with other features.
@@ -3727,14 +3733,14 @@ static int add_object_entry_from_pack(const struct object_id *oid,
37273733
return 0;
37283734

37293735
if (p) {
3730-
struct rev_info *revs = _data;
37313736
struct object_info oi = OBJECT_INFO_INIT;
37323737

37333738
oi.typep = &type;
37343739
if (packed_object_info(the_repository, p, ofs, &oi) < 0) {
37353740
die(_("could not get type of object %s in pack %s"),
37363741
oid_to_hex(oid), p->pack_name);
37373742
} else if (type == OBJ_COMMIT) {
3743+
struct rev_info *revs = _data;
37383744
/*
37393745
* commits in included packs are used as starting points for the
37403746
* subsequent revision walk
@@ -3750,32 +3756,48 @@ static int add_object_entry_from_pack(const struct object_id *oid,
37503756
return 0;
37513757
}
37523758

3753-
static void show_commit_pack_hint(struct commit *commit UNUSED,
3754-
void *data UNUSED)
3759+
static void show_object_pack_hint(struct object *object, const char *name,
3760+
void *data)
37553761
{
3756-
/* nothing to do; commits don't have a namehash */
3762+
enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data;
3763+
if (mode == STDIN_PACKS_MODE_FOLLOW) {
3764+
if (object->type == OBJ_BLOB &&
3765+
!has_object(the_repository, &object->oid, 0))
3766+
return;
3767+
add_object_entry(&object->oid, object->type, name, 0);
3768+
} else {
3769+
struct object_entry *oe = packlist_find(&to_pack, &object->oid);
3770+
if (!oe)
3771+
return;
3772+
3773+
/*
3774+
* Our 'to_pack' list was constructed by iterating all
3775+
* objects packed in included packs, and so doesn't have
3776+
* a non-zero hash field that you would typically pick
3777+
* up during a reachability traversal.
3778+
*
3779+
* Make a best-effort attempt to fill in the ->hash and
3780+
* ->no_try_delta fields here in order to perhaps
3781+
* improve the delta selection process.
3782+
*/
3783+
oe->hash = pack_name_hash_fn(name);
3784+
oe->no_try_delta = name && no_try_delta(name);
3785+
3786+
stdin_packs_hints_nr++;
3787+
}
37573788
}
37583789

3759-
static void show_object_pack_hint(struct object *object, const char *name,
3760-
void *data UNUSED)
3790+
static void show_commit_pack_hint(struct commit *commit, void *data)
37613791
{
3762-
struct object_entry *oe = packlist_find(&to_pack, &object->oid);
3763-
if (!oe)
3792+
enum stdin_packs_mode mode = *(enum stdin_packs_mode *)data;
3793+
3794+
if (mode == STDIN_PACKS_MODE_FOLLOW) {
3795+
show_object_pack_hint((struct object *)commit, "", data);
37643796
return;
3797+
}
37653798

3766-
/*
3767-
* Our 'to_pack' list was constructed by iterating all objects packed in
3768-
* included packs, and so doesn't have a non-zero hash field that you
3769-
* would typically pick up during a reachability traversal.
3770-
*
3771-
* Make a best-effort attempt to fill in the ->hash and ->no_try_delta
3772-
* here using a now in order to perhaps improve the delta selection
3773-
* process.
3774-
*/
3775-
oe->hash = pack_name_hash_fn(name);
3776-
oe->no_try_delta = name && no_try_delta(name);
3799+
/* nothing to do; commits don't have a namehash */
37773800

3778-
stdin_packs_hints_nr++;
37793801
}
37803802

37813803
static int pack_mtime_cmp(const void *_a, const void *_b)
@@ -3795,32 +3817,14 @@ static int pack_mtime_cmp(const void *_a, const void *_b)
37953817
return 0;
37963818
}
37973819

3798-
static void read_packs_list_from_stdin(void)
3820+
static void read_packs_list_from_stdin(struct rev_info *revs)
37993821
{
38003822
struct strbuf buf = STRBUF_INIT;
38013823
struct string_list include_packs = STRING_LIST_INIT_DUP;
38023824
struct string_list exclude_packs = STRING_LIST_INIT_DUP;
38033825
struct string_list_item *item = NULL;
38043826

38053827
struct packed_git *p;
3806-
struct rev_info revs;
3807-
3808-
repo_init_revisions(the_repository, &revs, NULL);
3809-
/*
3810-
* Use a revision walk to fill in the namehash of objects in the include
3811-
* packs. To save time, we'll avoid traversing through objects that are
3812-
* in excluded packs.
3813-
*
3814-
* That may cause us to avoid populating all of the namehash fields of
3815-
* all included objects, but our goal is best-effort, since this is only
3816-
* an optimization during delta selection.
3817-
*/
3818-
revs.no_kept_objects = 1;
3819-
revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
3820-
revs.blob_objects = 1;
3821-
revs.tree_objects = 1;
3822-
revs.tag_objects = 1;
3823-
revs.ignore_missing_links = 1;
38243828

38253829
while (strbuf_getline(&buf, stdin) != EOF) {
38263830
if (!buf.len)
@@ -3890,25 +3894,55 @@ static void read_packs_list_from_stdin(void)
38903894
struct packed_git *p = item->util;
38913895
for_each_object_in_pack(p,
38923896
add_object_entry_from_pack,
3893-
&revs,
3897+
revs,
38943898
FOR_EACH_OBJECT_PACK_ORDER);
38953899
}
38963900

3901+
strbuf_release(&buf);
3902+
string_list_clear(&include_packs, 0);
3903+
string_list_clear(&exclude_packs, 0);
3904+
}
3905+
3906+
static void add_unreachable_loose_objects(struct rev_info *revs);
3907+
3908+
static void read_stdin_packs(enum stdin_packs_mode mode, int rev_list_unpacked)
3909+
{
3910+
struct rev_info revs;
3911+
3912+
repo_init_revisions(the_repository, &revs, NULL);
3913+
/*
3914+
* Use a revision walk to fill in the namehash of objects in the include
3915+
* packs. To save time, we'll avoid traversing through objects that are
3916+
* in excluded packs.
3917+
*
3918+
* That may cause us to avoid populating all of the namehash fields of
3919+
* all included objects, but our goal is best-effort, since this is only
3920+
* an optimization during delta selection.
3921+
*/
3922+
revs.no_kept_objects = 1;
3923+
revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
3924+
revs.blob_objects = 1;
3925+
revs.tree_objects = 1;
3926+
revs.tag_objects = 1;
3927+
revs.ignore_missing_links = 1;
3928+
3929+
/* avoids adding objects in excluded packs */
3930+
ignore_packed_keep_in_core = 1;
3931+
read_packs_list_from_stdin(&revs);
3932+
if (rev_list_unpacked)
3933+
add_unreachable_loose_objects(&revs);
3934+
38973935
if (prepare_revision_walk(&revs))
38983936
die(_("revision walk setup failed"));
38993937
traverse_commit_list(&revs,
39003938
show_commit_pack_hint,
39013939
show_object_pack_hint,
3902-
NULL);
3940+
&mode);
39033941

39043942
trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found",
39053943
stdin_packs_found_nr);
39063944
trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints",
39073945
stdin_packs_hints_nr);
3908-
3909-
strbuf_release(&buf);
3910-
string_list_clear(&include_packs, 0);
3911-
string_list_clear(&exclude_packs, 0);
39123946
}
39133947

39143948
static void add_cruft_object_entry(const struct object_id *oid, enum object_type type,
@@ -4006,7 +4040,6 @@ static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep)
40064040
}
40074041
}
40084042

4009-
static void add_unreachable_loose_objects(void);
40104043
static void add_objects_in_unpacked_packs(void);
40114044

40124045
static void enumerate_cruft_objects(void)
@@ -4016,7 +4049,7 @@ static void enumerate_cruft_objects(void)
40164049
_("Enumerating cruft objects"), 0);
40174050

40184051
add_objects_in_unpacked_packs();
4019-
add_unreachable_loose_objects();
4052+
add_unreachable_loose_objects(NULL);
40204053

40214054
stop_progress(&progress_state);
40224055
}
@@ -4294,8 +4327,9 @@ static void add_objects_in_unpacked_packs(void)
42944327
}
42954328

42964329
static int add_loose_object(const struct object_id *oid, const char *path,
4297-
void *data UNUSED)
4330+
void *data)
42984331
{
4332+
struct rev_info *revs = data;
42994333
enum object_type type = odb_read_object_info(the_repository->objects, oid, NULL);
43004334

43014335
if (type < 0) {
@@ -4316,6 +4350,10 @@ static int add_loose_object(const struct object_id *oid, const char *path,
43164350
} else {
43174351
add_object_entry(oid, type, "", 0);
43184352
}
4353+
4354+
if (revs && type == OBJ_COMMIT)
4355+
add_pending_oid(revs, NULL, oid, 0);
4356+
43194357
return 0;
43204358
}
43214359

@@ -4324,11 +4362,10 @@ static int add_loose_object(const struct object_id *oid, const char *path,
43244362
* add_object_entry will weed out duplicates, so we just add every
43254363
* loose object we find.
43264364
*/
4327-
static void add_unreachable_loose_objects(void)
4365+
static void add_unreachable_loose_objects(struct rev_info *revs)
43284366
{
43294367
for_each_loose_file_in_objdir(repo_get_object_directory(the_repository),
4330-
add_loose_object,
4331-
NULL, NULL, NULL);
4368+
add_loose_object, NULL, NULL, revs);
43324369
}
43334370

43344371
static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid)
@@ -4675,7 +4712,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av)
46754712
if (keep_unreachable)
46764713
add_objects_in_unpacked_packs();
46774714
if (pack_loose_unreachable)
4678-
add_unreachable_loose_objects();
4715+
add_unreachable_loose_objects(NULL);
46794716
if (unpack_unreachable)
46804717
loosen_unused_packed_objects();
46814718

@@ -4782,6 +4819,23 @@ static int is_not_in_promisor_pack(struct commit *commit, void *data) {
47824819
return is_not_in_promisor_pack_obj((struct object *) commit, data);
47834820
}
47844821

4822+
static int parse_stdin_packs_mode(const struct option *opt, const char *arg,
4823+
int unset)
4824+
{
4825+
enum stdin_packs_mode *mode = opt->value;
4826+
4827+
if (unset)
4828+
*mode = STDIN_PACKS_MODE_NONE;
4829+
else if (!arg || !*arg)
4830+
*mode = STDIN_PACKS_MODE_STANDARD;
4831+
else if (!strcmp(arg, "follow"))
4832+
*mode = STDIN_PACKS_MODE_FOLLOW;
4833+
else
4834+
die(_("invalid value for '%s': '%s'"), opt->long_name, arg);
4835+
4836+
return 0;
4837+
}
4838+
47854839
int cmd_pack_objects(int argc,
47864840
const char **argv,
47874841
const char *prefix,
@@ -4792,7 +4846,7 @@ int cmd_pack_objects(int argc,
47924846
struct strvec rp = STRVEC_INIT;
47934847
int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
47944848
int rev_list_index = 0;
4795-
int stdin_packs = 0;
4849+
enum stdin_packs_mode stdin_packs = STDIN_PACKS_MODE_NONE;
47964850
struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
47974851
struct list_objects_filter_options filter_options =
47984852
LIST_OBJECTS_FILTER_INIT;
@@ -4847,6 +4901,9 @@ int cmd_pack_objects(int argc,
48474901
OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
48484902
N_("include objects referred to by the index"),
48494903
1, PARSE_OPT_NONEG),
4904+
OPT_CALLBACK_F(0, "stdin-packs", &stdin_packs, N_("mode"),
4905+
N_("read packs from stdin"),
4906+
PARSE_OPT_OPTARG, parse_stdin_packs_mode),
48504907
OPT_BOOL(0, "stdin-packs", &stdin_packs,
48514908
N_("read packs from stdin")),
48524909
OPT_BOOL(0, "stdout", &pack_to_stdout,
@@ -5012,9 +5069,10 @@ int cmd_pack_objects(int argc,
50125069
strvec_push(&rp, "--unpacked");
50135070
}
50145071

5015-
if (exclude_promisor_objects && exclude_promisor_objects_best_effort)
5016-
die(_("options '%s' and '%s' cannot be used together"),
5017-
"--exclude-promisor-objects", "--exclude-promisor-objects-best-effort");
5072+
die_for_incompatible_opt2(exclude_promisor_objects,
5073+
"--exclude-promisor-objects",
5074+
exclude_promisor_objects_best_effort,
5075+
"--exclude-promisor-objects-best-effort");
50185076
if (exclude_promisor_objects) {
50195077
use_internal_rev_list = 1;
50205078
fetch_if_missing = 0;
@@ -5052,22 +5110,23 @@ int cmd_pack_objects(int argc,
50525110
if (!pack_to_stdout && thin)
50535111
die(_("--thin cannot be used to build an indexable pack"));
50545112

5055-
if (keep_unreachable && unpack_unreachable)
5056-
die(_("options '%s' and '%s' cannot be used together"), "--keep-unreachable", "--unpack-unreachable");
5113+
die_for_incompatible_opt2(keep_unreachable, "--keep-unreachable",
5114+
unpack_unreachable, "--unpack-unreachable");
50575115
if (!rev_list_all || !rev_list_reflog || !rev_list_index)
50585116
unpack_unreachable_expiration = 0;
50595117

5060-
if (stdin_packs && filter_options.choice)
5061-
die(_("cannot use --filter with --stdin-packs"));
5118+
die_for_incompatible_opt2(stdin_packs, "--stdin-packs",
5119+
filter_options.choice, "--filter");
5120+
50625121

50635122
if (stdin_packs && use_internal_rev_list)
50645123
die(_("cannot use internal rev list with --stdin-packs"));
50655124

50665125
if (cruft) {
50675126
if (use_internal_rev_list)
50685127
die(_("cannot use internal rev list with --cruft"));
5069-
if (stdin_packs)
5070-
die(_("cannot use --stdin-packs with --cruft"));
5128+
die_for_incompatible_opt2(stdin_packs, "--stdin-packs",
5129+
cruft, "--cruft");
50715130
}
50725131

50735132
/*
@@ -5135,11 +5194,7 @@ int cmd_pack_objects(int argc,
51355194
progress_state = start_progress(the_repository,
51365195
_("Enumerating objects"), 0);
51375196
if (stdin_packs) {
5138-
/* avoids adding objects in excluded packs */
5139-
ignore_packed_keep_in_core = 1;
5140-
read_packs_list_from_stdin();
5141-
if (rev_list_unpacked)
5142-
add_unreachable_loose_objects();
5197+
read_stdin_packs(stdin_packs, rev_list_unpacked);
51435198
} else if (cruft) {
51445199
read_cruft_objects();
51455200
} else if (!use_internal_rev_list) {

0 commit comments

Comments
 (0)