Skip to content

Commit 7f514b7

Browse files
ttaylorrgitster
authored andcommitted
midx: read RIDX chunk when present
When a MIDX contains the new `RIDX` chunk, ensure that the reverse index is read from it instead of the on-disk .rev file. Since we need to encode the object order in the MIDX itself for correctness reasons, there is no point in storing the same data again outside of the MIDX. So, this patch stops writing separate .rev files, and reads it out of the MIDX itself. This is possible to do with relatively little new code, since the format of the RIDX chunk is identical to the data in the .rev file. In other words, we can implement this by pointing the `revindex_data` field at the reverse index chunk of the MIDX instead of the .rev file without any other changes. Note that we have two knobs that are adjusted for the new tests: GIT_TEST_MIDX_WRITE_REV and GIT_TEST_MIDX_READ_RIDX. The former controls whether the MIDX .rev is written at all, and the latter controls whether we read the MIDX's RIDX chunk. Both are necessary to ensure that the test added at the beginning of this series continues to work. This is because we always need to write the RIDX chunk in the MIDX in order to change its checksum, but we want to make sure reading the existing .rev file still works (since the RIDX chunk takes precedence by default). Arguably this isn't a very interesting mode to test, because the precedence rules mean that we'll always read the RIDX chunk over the .rev file. But it makes it impossible for a user to induce corruption in their repository by adjusting the test knobs (since if we had an either/or knob they could stop writing the RIDX chunk, allowing them to tweak the MIDX's object order without changing its checksum). Signed-off-by: Taylor Blau <[email protected]> Reviewed-by: Derrick Stolee <[email protected]> Reviewed-by: Jonathan Tan <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent a80f0f9 commit 7f514b7

File tree

7 files changed

+54
-7
lines changed

7 files changed

+54
-7
lines changed

midx.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,9 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
162162

163163
pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets);
164164

165+
if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
166+
pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex);
167+
165168
m->num_objects = ntohl(m->chunk_oid_fanout[255]);
166169

167170
CALLOC_ARRAY(m->pack_names, m->num_packs);
@@ -1429,7 +1432,8 @@ static int write_midx_internal(const char *object_dir,
14291432
finalize_hashfile(f, midx_hash, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
14301433
free_chunkfile(cf);
14311434

1432-
if (flags & MIDX_WRITE_REV_INDEX)
1435+
if (flags & MIDX_WRITE_REV_INDEX &&
1436+
git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0))
14331437
write_midx_reverse_index(midx_name.buf, midx_hash, &ctx);
14341438
if (flags & MIDX_WRITE_BITMAP) {
14351439
if (write_midx_bitmap(midx_name.buf, midx_hash, &ctx,

midx.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ struct multi_pack_index {
3636
const unsigned char *chunk_oid_lookup;
3737
const unsigned char *chunk_object_offsets;
3838
const unsigned char *chunk_large_offsets;
39+
const unsigned char *chunk_revindex;
3940

4041
const char **pack_names;
4142
struct packed_git **packs;

pack-revindex.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,9 +298,26 @@ int load_midx_revindex(struct multi_pack_index *m)
298298
{
299299
struct strbuf revindex_name = STRBUF_INIT;
300300
int ret;
301+
301302
if (m->revindex_data)
302303
return 0;
303304

305+
if (m->chunk_revindex) {
306+
/*
307+
* If the MIDX `m` has a `RIDX` chunk, then use its contents for
308+
* the reverse index instead of trying to load a separate `.rev`
309+
* file.
310+
*
311+
* Note that we do *not* set `m->revindex_map` here, since we do
312+
* not want to accidentally call munmap() in the middle of the
313+
* MIDX.
314+
*/
315+
trace2_data_string("load_midx_revindex", the_repository,
316+
"source", "midx");
317+
m->revindex_data = (const uint32_t *)m->chunk_revindex;
318+
return 0;
319+
}
320+
304321
trace2_data_string("load_midx_revindex", the_repository,
305322
"source", "rev");
306323

t/lib-bitmap.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ test_rev_exists () {
290290
}
291291

292292
midx_bitmap_core () {
293-
rev_kind="${1:-rev}"
293+
rev_kind="${1:-midx}"
294294

295295
setup_bitmap_history
296296

@@ -434,7 +434,7 @@ midx_bitmap_core () {
434434
}
435435

436436
midx_bitmap_partial_tests () {
437-
rev_kind="${1:-rev}"
437+
rev_kind="${1:-midx}"
438438

439439
test_expect_success 'setup partial bitmaps' '
440440
test_commit packed &&

t/t5326-multi-pack-bitmaps.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ test_description='exercise basic multi-pack bitmap functionality'
99
GIT_TEST_MULTI_PACK_INDEX=0
1010
GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0
1111

12+
# This test exercise multi-pack bitmap functionality where the object order is
13+
# stored and read from a special chunk within the MIDX, so use the default
14+
# behavior here.
15+
sane_unset GIT_TEST_MIDX_WRITE_REV
16+
sane_unset GIT_TEST_MIDX_READ_RIDX
17+
1218
midx_bitmap_core
1319

1420
bitmap_reuse_tests() {

t/t5327-multi-pack-bitmaps-rev.sh

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#!/bin/sh
2+
3+
test_description='exercise basic multi-pack bitmap functionality (.rev files)'
4+
5+
. ./test-lib.sh
6+
. "${TEST_DIRECTORY}/lib-bitmap.sh"
7+
8+
# We'll be writing our own midx and bitmaps, so avoid getting confused by the
9+
# automatic ones.
10+
GIT_TEST_MULTI_PACK_INDEX=0
11+
GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0
12+
13+
# Unlike t5326, this test exercise multi-pack bitmap functionality where the
14+
# object order is stored in a separate .rev file.
15+
GIT_TEST_MIDX_WRITE_REV=1
16+
GIT_TEST_MIDX_READ_RIDX=0
17+
export GIT_TEST_MIDX_WRITE_REV
18+
export GIT_TEST_MIDX_READ_RIDX
19+
20+
midx_bitmap_core rev
21+
midx_bitmap_partial_tests rev
22+
23+
test_done

t/t7700-repack.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -311,16 +311,13 @@ test_expect_success 'cleans up MIDX when appropriate' '
311311
checksum=$(midx_checksum $objdir) &&
312312
test_path_is_file $midx &&
313313
test_path_is_file $midx-$checksum.bitmap &&
314-
test_path_is_file $midx-$checksum.rev &&
315314
316315
test_commit repack-3 &&
317316
GIT_TEST_MULTI_PACK_INDEX=0 git repack -Adb --write-midx &&
318317
319318
test_path_is_file $midx &&
320319
test_path_is_missing $midx-$checksum.bitmap &&
321-
test_path_is_missing $midx-$checksum.rev &&
322320
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
323-
test_path_is_file $midx-$(midx_checksum $objdir).rev &&
324321
325322
test_commit repack-4 &&
326323
GIT_TEST_MULTI_PACK_INDEX=0 git repack -Adb &&
@@ -353,7 +350,6 @@ test_expect_success '--write-midx with preferred bitmap tips' '
353350
test_line_count = 1 before &&
354351
355352
rm -fr $midx-$(midx_checksum $objdir).bitmap &&
356-
rm -fr $midx-$(midx_checksum $objdir).rev &&
357353
rm -fr $midx &&
358354
359355
# instead of constructing the snapshot ourselves (c.f., the test

0 commit comments

Comments
 (0)