Skip to content

Commit 9218c6a

Browse files
ttaylorrgitster
authored andcommitted
midx: allow marking a pack as preferred
When multiple packs in the multi-pack index contain the same object, the MIDX machinery must make a choice about which pack it associates with that object. Prior to this patch, the lowest-ordered[1] pack was always selected. Pack selection for duplicate objects is relatively unimportant today, but it will become important for multi-pack bitmaps. This is because we can only invoke the pack-reuse mechanism when all of the bits for reused objects come from the reuse pack (in order to ensure that all reused deltas can find their base objects in the same pack). To encourage the pack selection process to prefer one pack over another (the pack to be preferred is the one a caller would like to later use as a reuse pack), introduce the concept of a "preferred pack". When provided, the MIDX code will always prefer an object found in a preferred pack over any other. No format changes are required to store the preferred pack, since it will be able to be inferred with a corresponding MIDX bitmap, by looking up the pack associated with the object in the first bit position (this ordering is described in detail in a subsequent commit). [1]: the ordering is specified by MIDX internals; for our purposes we can consider the "lowest ordered" pack to be "the one with the most-recent mtime. Signed-off-by: Taylor Blau <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 86d174b commit 9218c6a

File tree

7 files changed

+148
-18
lines changed

7 files changed

+148
-18
lines changed

Documentation/git-multi-pack-index.txt

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ git-multi-pack-index - Write and verify multi-pack-indexes
99
SYNOPSIS
1010
--------
1111
[verse]
12-
'git multi-pack-index' [--object-dir=<dir>] [--[no-]progress] <subcommand>
12+
'git multi-pack-index' [--object-dir=<dir>] [--[no-]progress]
13+
[--preferred-pack=<pack>] <subcommand>
1314

1415
DESCRIPTION
1516
-----------
@@ -30,7 +31,16 @@ OPTIONS
3031
The following subcommands are available:
3132

3233
write::
33-
Write a new MIDX file.
34+
Write a new MIDX file. The following options are available for
35+
the `write` sub-command:
36+
+
37+
--
38+
--preferred-pack=<pack>::
39+
Optionally specify the tie-breaking pack used when
40+
multiple packs contain the same object. If not given,
41+
ties are broken in favor of the pack with the lowest
42+
mtime.
43+
--
3444

3545
verify::
3646
Verify the contents of the MIDX file.

Documentation/technical/multi-pack-index.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,9 @@ Design Details
4343
a change in format.
4444

4545
- The MIDX keeps only one record per object ID. If an object appears
46-
in multiple packfiles, then the MIDX selects the copy in the most-
47-
recently modified packfile.
46+
in multiple packfiles, then the MIDX selects the copy in the
47+
preferred packfile, otherwise selecting from the most-recently
48+
modified packfile.
4849

4950
- If there exist packfiles in the pack directory not registered in
5051
the MIDX, then those packfiles are loaded into the `packed_git`

builtin/multi-pack-index.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
#include "parse-options.h"
55
#include "midx.h"
66
#include "trace2.h"
7+
#include "object-store.h"
78

89
#define BUILTIN_MIDX_WRITE_USAGE \
9-
N_("git multi-pack-index [<options>] write")
10+
N_("git multi-pack-index [<options>] write [--preferred-pack=<pack>]")
1011

1112
#define BUILTIN_MIDX_VERIFY_USAGE \
1213
N_("git multi-pack-index [<options>] verify")
@@ -43,6 +44,7 @@ static char const * const builtin_multi_pack_index_usage[] = {
4344

4445
static struct opts_multi_pack_index {
4546
const char *object_dir;
47+
const char *preferred_pack;
4648
unsigned long batch_size;
4749
unsigned flags;
4850
} opts;
@@ -61,7 +63,15 @@ static struct option *add_common_options(struct option *prev)
6163

6264
static int cmd_multi_pack_index_write(int argc, const char **argv)
6365
{
64-
struct option *options = common_opts;
66+
struct option *options;
67+
static struct option builtin_multi_pack_index_write_options[] = {
68+
OPT_STRING(0, "preferred-pack", &opts.preferred_pack,
69+
N_("preferred-pack"),
70+
N_("pack for reuse when computing a multi-pack bitmap")),
71+
OPT_END(),
72+
};
73+
74+
options = add_common_options(builtin_multi_pack_index_write_options);
6575

6676
trace2_cmd_mode(argv[0]);
6777

@@ -72,7 +82,10 @@ static int cmd_multi_pack_index_write(int argc, const char **argv)
7282
usage_with_options(builtin_multi_pack_index_write_usage,
7383
options);
7484

75-
return write_midx_file(opts.object_dir, opts.flags);
85+
FREE_AND_NULL(options);
86+
87+
return write_midx_file(opts.object_dir, opts.preferred_pack,
88+
opts.flags);
7689
}
7790

7891
static int cmd_multi_pack_index_verify(int argc, const char **argv)

builtin/repack.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
523523
remove_temporary_files();
524524

525525
if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0))
526-
write_midx_file(get_object_directory(), 0);
526+
write_midx_file(get_object_directory(), NULL, 0);
527527

528528
string_list_clear(&names, 0);
529529
string_list_clear(&rollback, 0);

midx.c

Lines changed: 73 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,14 @@ static int pack_info_compare(const void *_a, const void *_b)
431431
return strcmp(a->pack_name, b->pack_name);
432432
}
433433

434+
static int idx_or_pack_name_cmp(const void *_va, const void *_vb)
435+
{
436+
const char *pack_name = _va;
437+
const struct pack_info *compar = _vb;
438+
439+
return cmp_idx_or_pack_name(pack_name, compar->pack_name);
440+
}
441+
434442
struct write_midx_context {
435443
struct pack_info *info;
436444
uint32_t nr;
@@ -445,6 +453,8 @@ struct write_midx_context {
445453
uint32_t *pack_perm;
446454
unsigned large_offsets_needed:1;
447455
uint32_t num_large_offsets;
456+
457+
int preferred_pack_idx;
448458
};
449459

450460
static void add_pack_to_midx(const char *full_path, size_t full_path_len,
@@ -489,6 +499,7 @@ struct pack_midx_entry {
489499
uint32_t pack_int_id;
490500
time_t pack_mtime;
491501
uint64_t offset;
502+
unsigned preferred : 1;
492503
};
493504

494505
static int midx_oid_compare(const void *_a, const void *_b)
@@ -500,6 +511,12 @@ static int midx_oid_compare(const void *_a, const void *_b)
500511
if (cmp)
501512
return cmp;
502513

514+
/* Sort objects in a preferred pack first when multiple copies exist. */
515+
if (a->preferred > b->preferred)
516+
return -1;
517+
if (a->preferred < b->preferred)
518+
return 1;
519+
503520
if (a->pack_mtime > b->pack_mtime)
504521
return -1;
505522
else if (a->pack_mtime < b->pack_mtime)
@@ -527,7 +544,8 @@ static int nth_midxed_pack_midx_entry(struct multi_pack_index *m,
527544
static void fill_pack_entry(uint32_t pack_int_id,
528545
struct packed_git *p,
529546
uint32_t cur_object,
530-
struct pack_midx_entry *entry)
547+
struct pack_midx_entry *entry,
548+
int preferred)
531549
{
532550
if (nth_packed_object_id(&entry->oid, p, cur_object) < 0)
533551
die(_("failed to locate object %d in packfile"), cur_object);
@@ -536,6 +554,7 @@ static void fill_pack_entry(uint32_t pack_int_id,
536554
entry->pack_mtime = p->mtime;
537555

538556
entry->offset = nth_packed_object_offset(p, cur_object);
557+
entry->preferred = !!preferred;
539558
}
540559

541560
/*
@@ -552,7 +571,8 @@ static void fill_pack_entry(uint32_t pack_int_id,
552571
static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
553572
struct pack_info *info,
554573
uint32_t nr_packs,
555-
uint32_t *nr_objects)
574+
uint32_t *nr_objects,
575+
int preferred_pack)
556576
{
557577
uint32_t cur_fanout, cur_pack, cur_object;
558578
uint32_t alloc_fanout, alloc_objects, total_objects = 0;
@@ -589,20 +609,29 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
589609
nth_midxed_pack_midx_entry(m,
590610
&entries_by_fanout[nr_fanout],
591611
cur_object);
612+
if (nth_midxed_pack_int_id(m, cur_object) == preferred_pack)
613+
entries_by_fanout[nr_fanout].preferred = 1;
614+
else
615+
entries_by_fanout[nr_fanout].preferred = 0;
592616
nr_fanout++;
593617
}
594618
}
595619

596620
for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) {
597621
uint32_t start = 0, end;
622+
int preferred = cur_pack == preferred_pack;
598623

599624
if (cur_fanout)
600625
start = get_pack_fanout(info[cur_pack].p, cur_fanout - 1);
601626
end = get_pack_fanout(info[cur_pack].p, cur_fanout);
602627

603628
for (cur_object = start; cur_object < end; cur_object++) {
604629
ALLOC_GROW(entries_by_fanout, nr_fanout + 1, alloc_fanout);
605-
fill_pack_entry(cur_pack, info[cur_pack].p, cur_object, &entries_by_fanout[nr_fanout]);
630+
fill_pack_entry(cur_pack,
631+
info[cur_pack].p,
632+
cur_object,
633+
&entries_by_fanout[nr_fanout],
634+
preferred);
606635
nr_fanout++;
607636
}
608637
}
@@ -777,7 +806,9 @@ static int write_midx_large_offsets(struct hashfile *f,
777806
}
778807

779808
static int write_midx_internal(const char *object_dir, struct multi_pack_index *m,
780-
struct string_list *packs_to_drop, unsigned flags)
809+
struct string_list *packs_to_drop,
810+
const char *preferred_pack_name,
811+
unsigned flags)
781812
{
782813
char *midx_name;
783814
uint32_t i;
@@ -828,7 +859,19 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
828859
if (ctx.m && ctx.nr == ctx.m->num_packs && !packs_to_drop)
829860
goto cleanup;
830861

831-
ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr);
862+
ctx.preferred_pack_idx = -1;
863+
if (preferred_pack_name) {
864+
for (i = 0; i < ctx.nr; i++) {
865+
if (!cmp_idx_or_pack_name(preferred_pack_name,
866+
ctx.info[i].pack_name)) {
867+
ctx.preferred_pack_idx = i;
868+
break;
869+
}
870+
}
871+
}
872+
873+
ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr,
874+
ctx.preferred_pack_idx);
832875

833876
ctx.large_offsets_needed = 0;
834877
for (i = 0; i < ctx.entries_nr; i++) {
@@ -889,6 +932,24 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
889932
pack_name_concat_len += strlen(ctx.info[i].pack_name) + 1;
890933
}
891934

935+
/* Check that the preferred pack wasn't expired (if given). */
936+
if (preferred_pack_name) {
937+
struct pack_info *preferred = bsearch(preferred_pack_name,
938+
ctx.info, ctx.nr,
939+
sizeof(*ctx.info),
940+
idx_or_pack_name_cmp);
941+
942+
if (!preferred)
943+
warning(_("unknown preferred pack: '%s'"),
944+
preferred_pack_name);
945+
else {
946+
uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id];
947+
if (perm == PACK_EXPIRED)
948+
warning(_("preferred pack '%s' is expired"),
949+
preferred_pack_name);
950+
}
951+
}
952+
892953
if (pack_name_concat_len % MIDX_CHUNK_ALIGNMENT)
893954
pack_name_concat_len += MIDX_CHUNK_ALIGNMENT -
894955
(pack_name_concat_len % MIDX_CHUNK_ALIGNMENT);
@@ -947,9 +1008,12 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index *
9471008
return result;
9481009
}
9491010

950-
int write_midx_file(const char *object_dir, unsigned flags)
1011+
int write_midx_file(const char *object_dir,
1012+
const char *preferred_pack_name,
1013+
unsigned flags)
9511014
{
952-
return write_midx_internal(object_dir, NULL, NULL, flags);
1015+
return write_midx_internal(object_dir, NULL, NULL, preferred_pack_name,
1016+
flags);
9531017
}
9541018

9551019
void clear_midx_file(struct repository *r)
@@ -1184,7 +1248,7 @@ int expire_midx_packs(struct repository *r, const char *object_dir, unsigned fla
11841248
free(count);
11851249

11861250
if (packs_to_drop.nr)
1187-
result = write_midx_internal(object_dir, m, &packs_to_drop, flags);
1251+
result = write_midx_internal(object_dir, m, &packs_to_drop, NULL, flags);
11881252

11891253
string_list_clear(&packs_to_drop, 0);
11901254
return result;
@@ -1373,7 +1437,7 @@ int midx_repack(struct repository *r, const char *object_dir, size_t batch_size,
13731437
goto cleanup;
13741438
}
13751439

1376-
result = write_midx_internal(object_dir, m, NULL, flags);
1440+
result = write_midx_internal(object_dir, m, NULL, NULL, flags);
13771441
m = NULL;
13781442

13791443
cleanup:

midx.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ int fill_midx_entry(struct repository *r, const struct object_id *oid, struct pa
4747
int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name);
4848
int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local);
4949

50-
int write_midx_file(const char *object_dir, unsigned flags);
50+
int write_midx_file(const char *object_dir, const char *preferred_pack_name, unsigned flags);
5151
void clear_midx_file(struct repository *r);
5252
int verify_midx_file(struct repository *r, const char *object_dir, unsigned flags);
5353
int expire_midx_packs(struct repository *r, const char *object_dir, unsigned flags);

t/t5319-multi-pack-index.sh

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,48 @@ test_expect_success 'warn on improper hash version' '
234234
)
235235
'
236236

237+
test_expect_success 'midx picks objects from preferred pack' '
238+
test_when_finished rm -rf preferred.git &&
239+
git init --bare preferred.git &&
240+
(
241+
cd preferred.git &&
242+
243+
a=$(echo "a" | git hash-object -w --stdin) &&
244+
b=$(echo "b" | git hash-object -w --stdin) &&
245+
c=$(echo "c" | git hash-object -w --stdin) &&
246+
247+
# Set up two packs, duplicating the object "B" at different
248+
# offsets.
249+
#
250+
# Note that the "BC" pack (the one we choose as preferred) sorts
251+
# lexically after the "AB" pack, meaning that omitting the
252+
# --preferred-pack argument would cause this test to fail (since
253+
# the MIDX code would select the copy of "b" in the "AB" pack).
254+
git pack-objects objects/pack/test-AB <<-EOF &&
255+
$a
256+
$b
257+
EOF
258+
bc=$(git pack-objects objects/pack/test-BC <<-EOF
259+
$b
260+
$c
261+
EOF
262+
) &&
263+
264+
git multi-pack-index --object-dir=objects \
265+
write --preferred-pack=test-BC-$bc.idx 2>err &&
266+
test_must_be_empty err &&
267+
268+
test-tool read-midx --show-objects objects >out &&
269+
270+
ofs=$(git show-index <objects/pack/test-BC-$bc.idx | grep $b |
271+
cut -d" " -f1) &&
272+
printf "%s %s\tobjects/pack/test-BC-%s.pack\n" \
273+
"$b" "$ofs" "$bc" >expect &&
274+
grep ^$b out >actual &&
275+
276+
test_cmp expect actual
277+
)
278+
'
237279

238280
test_expect_success 'verify multi-pack-index success' '
239281
git multi-pack-index verify --object-dir=$objdir

0 commit comments

Comments
 (0)