Skip to content

Commit f2cb46a

Browse files
committed
Merge branch 'tb/midx-bitmap-corruption-fix'
A bug that made multi-pack bitmap and the object order out-of-sync, making the .midx data corrupt, has been fixed. * tb/midx-bitmap-corruption-fix: pack-bitmap.c: gracefully fallback after opening pack/MIDX midx: read `RIDX` chunk when present t/lib-bitmap.sh: parameterize tests over reverse index source t5326: move tests to t/lib-bitmap.sh t5326: extract `test_rev_exists` t5326: drop unnecessary setup pack-revindex.c: instrument loading on-disk reverse index midx.c: make changing the preferred pack safe t5326: demonstrate bitmap corruption after permutation
2 parents 90b7153 + f8b60cf commit f2cb46a

11 files changed

+321
-151
lines changed

Documentation/technical/multi-pack-index.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ and their offsets into multiple packfiles. It contains:
2424
** An offset within the jth packfile for the object.
2525
* If large offsets are required, we use another list of large
2626
offsets similar to version 2 pack-indexes.
27+
- An optional list of objects in pseudo-pack order (used with MIDX bitmaps).
2728

2829
Thus, we can provide O(log N) lookup time for any number
2930
of packfiles.

Documentation/technical/pack-format.txt

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,11 @@ CHUNK DATA:
376376
[Optional] Object Large Offsets (ID: {'L', 'O', 'F', 'F'})
377377
8-byte offsets into large packfiles.
378378

379+
[Optional] Bitmap pack order (ID: {'R', 'I', 'D', 'X'})
380+
A list of MIDX positions (one per object in the MIDX, num_objects in
381+
total, each a 4-byte unsigned integer in network byte order), sorted
382+
according to their relative bitmap/pseudo-pack positions.
383+
379384
TRAILER:
380385

381386
Index checksum of the above contents.
@@ -456,9 +461,5 @@ In short, a MIDX's pseudo-pack is the de-duplicated concatenation of
456461
objects in packs stored by the MIDX, laid out in pack order, and the
457462
packs arranged in MIDX order (with the preferred pack coming first).
458463

459-
Finally, note that the MIDX's reverse index is not stored as a chunk in
460-
the multi-pack-index itself. This is done because the reverse index
461-
includes the checksum of the pack or MIDX to which it belongs, which
462-
makes it impossible to write in the MIDX. To avoid races when rewriting
463-
the MIDX, a MIDX reverse index includes the MIDX's checksum in its
464-
filename (e.g., `multi-pack-index-xyz.rev`).
464+
The MIDX's reverse index is stored in the optional 'RIDX' chunk within
465+
the MIDX itself.

midx.c

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
3434
#define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */
3535
#define MIDX_CHUNKID_LARGEOFFSETS 0x4c4f4646 /* "LOFF" */
36+
#define MIDX_CHUNKID_REVINDEX 0x52494458 /* "RIDX" */
3637
#define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256)
3738
#define MIDX_CHUNK_OFFSET_WIDTH (2 * sizeof(uint32_t))
3839
#define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t))
@@ -161,6 +162,9 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
161162

162163
pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets);
163164

165+
if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
166+
pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex);
167+
164168
m->num_objects = ntohl(m->chunk_oid_fanout[255]);
165169

166170
CALLOC_ARRAY(m->pack_names, m->num_packs);
@@ -833,6 +837,18 @@ static int write_midx_large_offsets(struct hashfile *f,
833837
return 0;
834838
}
835839

840+
static int write_midx_revindex(struct hashfile *f,
841+
void *data)
842+
{
843+
struct write_midx_context *ctx = data;
844+
uint32_t i;
845+
846+
for (i = 0; i < ctx->entries_nr; i++)
847+
hashwrite_be32(f, ctx->pack_order[i]);
848+
849+
return 0;
850+
}
851+
836852
struct midx_pack_order_data {
837853
uint32_t nr;
838854
uint32_t pack;
@@ -1403,16 +1419,21 @@ static int write_midx_internal(const char *object_dir,
14031419
(size_t)ctx.num_large_offsets * MIDX_CHUNK_LARGE_OFFSET_WIDTH,
14041420
write_midx_large_offsets);
14051421

1422+
if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) {
1423+
ctx.pack_order = midx_pack_order(&ctx);
1424+
add_chunk(cf, MIDX_CHUNKID_REVINDEX,
1425+
ctx.entries_nr * sizeof(uint32_t),
1426+
write_midx_revindex);
1427+
}
1428+
14061429
write_midx_header(f, get_num_chunks(cf), ctx.nr - dropped_packs);
14071430
write_chunkfile(cf, &ctx);
14081431

14091432
finalize_hashfile(f, midx_hash, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
14101433
free_chunkfile(cf);
14111434

1412-
if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))
1413-
ctx.pack_order = midx_pack_order(&ctx);
1414-
1415-
if (flags & MIDX_WRITE_REV_INDEX)
1435+
if (flags & MIDX_WRITE_REV_INDEX &&
1436+
git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0))
14161437
write_midx_reverse_index(midx_name.buf, midx_hash, &ctx);
14171438
if (flags & MIDX_WRITE_BITMAP) {
14181439
if (write_midx_bitmap(midx_name.buf, midx_hash, &ctx,

midx.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ struct multi_pack_index {
3636
const unsigned char *chunk_oid_lookup;
3737
const unsigned char *chunk_object_offsets;
3838
const unsigned char *chunk_large_offsets;
39+
const unsigned char *chunk_revindex;
3940

4041
const char **pack_names;
4142
struct packed_git **packs;

pack-bitmap.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,9 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
358358
cleanup:
359359
munmap(bitmap_git->map, bitmap_git->map_size);
360360
bitmap_git->map_size = 0;
361+
bitmap_git->map_pos = 0;
361362
bitmap_git->map = NULL;
363+
bitmap_git->midx = NULL;
362364
return -1;
363365
}
364366

@@ -405,6 +407,8 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git
405407
munmap(bitmap_git->map, bitmap_git->map_size);
406408
bitmap_git->map = NULL;
407409
bitmap_git->map_size = 0;
410+
bitmap_git->map_pos = 0;
411+
bitmap_git->pack = NULL;
408412
return -1;
409413
}
410414

pack-revindex.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,9 +298,29 @@ int load_midx_revindex(struct multi_pack_index *m)
298298
{
299299
struct strbuf revindex_name = STRBUF_INIT;
300300
int ret;
301+
301302
if (m->revindex_data)
302303
return 0;
303304

305+
if (m->chunk_revindex) {
306+
/*
307+
* If the MIDX `m` has a `RIDX` chunk, then use its contents for
308+
* the reverse index instead of trying to load a separate `.rev`
309+
* file.
310+
*
311+
* Note that we do *not* set `m->revindex_map` here, since we do
312+
* not want to accidentally call munmap() in the middle of the
313+
* MIDX.
314+
*/
315+
trace2_data_string("load_midx_revindex", the_repository,
316+
"source", "midx");
317+
m->revindex_data = (const uint32_t *)m->chunk_revindex;
318+
return 0;
319+
}
320+
321+
trace2_data_string("load_midx_revindex", the_repository,
322+
"source", "rev");
323+
304324
get_midx_rev_filename(&revindex_name, m);
305325

306326
ret = load_revindex_from_disk(revindex_name.buf,

t/lib-bitmap.sh

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# Helpers for scripts testing bitmap functionality; see t5310 for
22
# example usage.
33

4+
objdir=.git/objects
5+
midx=$objdir/pack/multi-pack-index
6+
47
# Compare a file containing rev-list bitmap traversal output to its non-bitmap
58
# counterpart. You can't just use test_cmp for this, because the two produce
69
# subtly different output:
@@ -264,3 +267,185 @@ have_delta () {
264267
midx_checksum () {
265268
test-tool read-midx --checksum "$1"
266269
}
270+
271+
# midx_pack_source <obj>
272+
midx_pack_source () {
273+
test-tool read-midx --show-objects .git/objects | grep "^$1 " | cut -f2
274+
}
275+
276+
test_rev_exists () {
277+
commit="$1"
278+
kind="$2"
279+
280+
test_expect_success "reverse index exists ($kind)" '
281+
GIT_TRACE2_EVENT=$(pwd)/event.trace \
282+
git rev-list --test-bitmap "$commit" &&
283+
284+
if test "rev" = "$kind"
285+
then
286+
test_path_is_file $midx-$(midx_checksum $objdir).rev
287+
fi &&
288+
grep "\"category\":\"load_midx_revindex\",\"key\":\"source\",\"value\":\"$kind\"" event.trace
289+
'
290+
}
291+
292+
midx_bitmap_core () {
293+
rev_kind="${1:-midx}"
294+
295+
setup_bitmap_history
296+
297+
test_expect_success 'create single-pack midx with bitmaps' '
298+
git repack -ad &&
299+
git multi-pack-index write --bitmap &&
300+
test_path_is_file $midx &&
301+
test_path_is_file $midx-$(midx_checksum $objdir).bitmap
302+
'
303+
304+
test_rev_exists HEAD "$rev_kind"
305+
306+
basic_bitmap_tests
307+
308+
test_expect_success 'create new additional packs' '
309+
for i in $(test_seq 1 16)
310+
do
311+
test_commit "$i" &&
312+
git repack -d || return 1
313+
done &&
314+
315+
git checkout -b other2 HEAD~8 &&
316+
for i in $(test_seq 1 8)
317+
do
318+
test_commit "side-$i" &&
319+
git repack -d || return 1
320+
done &&
321+
git checkout second
322+
'
323+
324+
test_expect_success 'create multi-pack midx with bitmaps' '
325+
git multi-pack-index write --bitmap &&
326+
327+
ls $objdir/pack/pack-*.pack >packs &&
328+
test_line_count = 25 packs &&
329+
330+
test_path_is_file $midx &&
331+
test_path_is_file $midx-$(midx_checksum $objdir).bitmap
332+
'
333+
334+
test_rev_exists HEAD "$rev_kind"
335+
336+
basic_bitmap_tests
337+
338+
test_expect_success '--no-bitmap is respected when bitmaps exist' '
339+
git multi-pack-index write --bitmap &&
340+
341+
test_commit respect--no-bitmap &&
342+
git repack -d &&
343+
344+
test_path_is_file $midx &&
345+
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
346+
347+
git multi-pack-index write --no-bitmap &&
348+
349+
test_path_is_file $midx &&
350+
test_path_is_missing $midx-$(midx_checksum $objdir).bitmap &&
351+
test_path_is_missing $midx-$(midx_checksum $objdir).rev
352+
'
353+
354+
test_expect_success 'setup midx with base from later pack' '
355+
# Write a and b so that "a" is a delta on top of base "b", since Git
356+
# prefers to delete contents out of a base rather than add to a shorter
357+
# object.
358+
test_seq 1 128 >a &&
359+
test_seq 1 130 >b &&
360+
361+
git add a b &&
362+
git commit -m "initial commit" &&
363+
364+
a=$(git rev-parse HEAD:a) &&
365+
b=$(git rev-parse HEAD:b) &&
366+
367+
# In the first pack, "a" is stored as a delta to "b".
368+
p1=$(git pack-objects .git/objects/pack/pack <<-EOF
369+
$a
370+
$b
371+
EOF
372+
) &&
373+
374+
# In the second pack, "a" is missing, and "b" is not a delta nor base to
375+
# any other object.
376+
p2=$(git pack-objects .git/objects/pack/pack <<-EOF
377+
$b
378+
$(git rev-parse HEAD)
379+
$(git rev-parse HEAD^{tree})
380+
EOF
381+
) &&
382+
383+
git prune-packed &&
384+
# Use the second pack as the preferred source, so that "b" occurs
385+
# earlier in the MIDX object order, rendering "a" unusable for pack
386+
# reuse.
387+
git multi-pack-index write --bitmap --preferred-pack=pack-$p2.idx &&
388+
389+
have_delta $a $b &&
390+
test $(midx_pack_source $a) != $(midx_pack_source $b)
391+
'
392+
393+
rev_list_tests 'full bitmap with backwards delta'
394+
395+
test_expect_success 'clone with bitmaps enabled' '
396+
git clone --no-local --bare . clone-reverse-delta.git &&
397+
test_when_finished "rm -fr clone-reverse-delta.git" &&
398+
399+
git rev-parse HEAD >expect &&
400+
git --git-dir=clone-reverse-delta.git rev-parse HEAD >actual &&
401+
test_cmp expect actual
402+
'
403+
404+
test_expect_success 'changing the preferred pack does not corrupt bitmaps' '
405+
rm -fr repo &&
406+
git init repo &&
407+
test_when_finished "rm -fr repo" &&
408+
(
409+
cd repo &&
410+
411+
test_commit A &&
412+
test_commit B &&
413+
414+
git rev-list --objects --no-object-names HEAD^ >A.objects &&
415+
git rev-list --objects --no-object-names HEAD^.. >B.objects &&
416+
417+
A=$(git pack-objects $objdir/pack/pack <A.objects) &&
418+
B=$(git pack-objects $objdir/pack/pack <B.objects) &&
419+
420+
cat >indexes <<-EOF &&
421+
pack-$A.idx
422+
pack-$B.idx
423+
EOF
424+
425+
git multi-pack-index write --bitmap --stdin-packs \
426+
--preferred-pack=pack-$A.pack <indexes &&
427+
git rev-list --test-bitmap A &&
428+
429+
git multi-pack-index write --bitmap --stdin-packs \
430+
--preferred-pack=pack-$B.pack <indexes &&
431+
git rev-list --test-bitmap A
432+
)
433+
'
434+
}
435+
436+
midx_bitmap_partial_tests () {
437+
rev_kind="${1:-midx}"
438+
439+
test_expect_success 'setup partial bitmaps' '
440+
test_commit packed &&
441+
git repack &&
442+
test_commit loose &&
443+
git multi-pack-index write --bitmap 2>err &&
444+
test_path_is_file $midx &&
445+
test_path_is_file $midx-$(midx_checksum $objdir).bitmap
446+
'
447+
448+
test_rev_exists HEAD~ "$rev_kind"
449+
450+
basic_bitmap_tests HEAD~
451+
}

t/t5310-pack-bitmaps.sh

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,4 +397,32 @@ test_expect_success 'pack.preferBitmapTips' '
397397
)
398398
'
399399

400+
test_expect_success 'complains about multiple pack bitmaps' '
401+
rm -fr repo &&
402+
git init repo &&
403+
test_when_finished "rm -fr repo" &&
404+
(
405+
cd repo &&
406+
407+
test_commit base &&
408+
409+
git repack -adb &&
410+
bitmap="$(ls .git/objects/pack/pack-*.bitmap)" &&
411+
mv "$bitmap" "$bitmap.bak" &&
412+
413+
test_commit other &&
414+
git repack -ab &&
415+
416+
mv "$bitmap.bak" "$bitmap" &&
417+
418+
find .git/objects/pack -type f -name "*.pack" >packs &&
419+
find .git/objects/pack -type f -name "*.bitmap" >bitmaps &&
420+
test_line_count = 2 packs &&
421+
test_line_count = 2 bitmaps &&
422+
423+
git rev-list --use-bitmap-index HEAD 2>err &&
424+
grep "ignoring extra bitmap file" err
425+
)
426+
'
427+
400428
test_done

0 commit comments

Comments
 (0)