Skip to content

Commit ddac965

Browse files
pks-tgitster
authored andcommitted
reftable/writer: fix index corruption when writing multiple indices
Each reftable may contain multiple types of blocks for refs, objects and reflog records, where each of these may have an index that makes it more efficient to find the records. It was observed that the index for log records can become corrupted under certain circumstances, where the first entry of the index points into the object index instead of to the log records. As it turns out, this corruption can occur whenever we write a log index as well as at least one additional index. Writing records and their index is basically a two-step process: 1. We write all blocks for the corresponding record. Each block that gets written is added to a list of blocks to index. 2. Once all blocks were written we finish the section. If at least two blocks have been added to the list of blocks to index then we will now write the index for those blocks and flush it, as well. When we have a very large number of blocks then we may decide to write a multi-level index, which is why we also keep track of the list of the index blocks in the same way as we previously kept track of the blocks to index. Now when we have finished writing all index blocks we clear the index and flush the last block to disk. This is done in the wrong order though because flushing the block to disk will re-add it to the list of blocks to be indexed. The result is that the next section we are about to write will have an entry in the list of blocks to index that points to the last block of the preceding section's index, which will corrupt the log index. Fix this corruption by clearing the index after having written the last block. Signed-off-by: Patrick Steinhardt <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 75d7906 commit ddac965

File tree

2 files changed

+82
-2
lines changed

2 files changed

+82
-2
lines changed

reftable/readwrite_test.c

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,85 @@ static void test_write_key_order(void)
798798
strbuf_release(&buf);
799799
}
800800

801+
static void test_write_multiple_indices(void)
802+
{
803+
struct reftable_write_options opts = {
804+
.block_size = 100,
805+
};
806+
struct strbuf writer_buf = STRBUF_INIT, buf = STRBUF_INIT;
807+
struct reftable_block_source source = { 0 };
808+
struct reftable_iterator it = { 0 };
809+
const struct reftable_stats *stats;
810+
struct reftable_writer *writer;
811+
struct reftable_reader *reader;
812+
int err, i;
813+
814+
writer = reftable_new_writer(&strbuf_add_void, &writer_buf, &opts);
815+
reftable_writer_set_limits(writer, 1, 1);
816+
for (i = 0; i < 100; i++) {
817+
unsigned char hash[GIT_SHA1_RAWSZ] = {i};
818+
struct reftable_ref_record ref = {
819+
.update_index = 1,
820+
.value_type = REFTABLE_REF_VAL1,
821+
.value.val1 = hash,
822+
};
823+
824+
strbuf_reset(&buf);
825+
strbuf_addf(&buf, "refs/heads/%04d", i);
826+
ref.refname = buf.buf,
827+
828+
err = reftable_writer_add_ref(writer, &ref);
829+
EXPECT_ERR(err);
830+
}
831+
832+
for (i = 0; i < 100; i++) {
833+
unsigned char hash[GIT_SHA1_RAWSZ] = {i};
834+
struct reftable_log_record log = {
835+
.update_index = 1,
836+
.value_type = REFTABLE_LOG_UPDATE,
837+
.value.update = {
838+
.old_hash = hash,
839+
.new_hash = hash,
840+
},
841+
};
842+
843+
strbuf_reset(&buf);
844+
strbuf_addf(&buf, "refs/heads/%04d", i);
845+
log.refname = buf.buf,
846+
847+
err = reftable_writer_add_log(writer, &log);
848+
EXPECT_ERR(err);
849+
}
850+
851+
reftable_writer_close(writer);
852+
853+
/*
854+
* The written data should be sufficiently large to result in indices
855+
* for each of the block types.
856+
*/
857+
stats = reftable_writer_stats(writer);
858+
EXPECT(stats->ref_stats.index_offset > 0);
859+
EXPECT(stats->obj_stats.index_offset > 0);
860+
EXPECT(stats->log_stats.index_offset > 0);
861+
862+
block_source_from_strbuf(&source, &writer_buf);
863+
err = reftable_new_reader(&reader, &source, "filename");
864+
EXPECT_ERR(err);
865+
866+
/*
867+
* Seeking the log uses the log index now. In case there is any
868+
* confusion regarding indices we would notice here.
869+
*/
870+
err = reftable_reader_seek_log(reader, &it, "");
871+
EXPECT_ERR(err);
872+
873+
reftable_iterator_destroy(&it);
874+
reftable_writer_free(writer);
875+
reftable_reader_free(reader);
876+
strbuf_release(&writer_buf);
877+
strbuf_release(&buf);
878+
}
879+
801880
static void test_corrupt_table_empty(void)
802881
{
803882
struct strbuf buf = STRBUF_INIT;
@@ -847,5 +926,6 @@ int readwrite_test_main(int argc, const char *argv[])
847926
RUN_TEST(test_log_overflow);
848927
RUN_TEST(test_write_object_id_length);
849928
RUN_TEST(test_write_object_id_min_length);
929+
RUN_TEST(test_write_multiple_indices);
850930
return 0;
851931
}

reftable/writer.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -432,12 +432,12 @@ static int writer_finish_section(struct reftable_writer *w)
432432
reftable_free(idx);
433433
}
434434

435-
writer_clear_index(w);
436-
437435
err = writer_flush_block(w);
438436
if (err < 0)
439437
return err;
440438

439+
writer_clear_index(w);
440+
441441
bstats = writer_reftable_block_stats(w, typ);
442442
bstats->index_blocks = w->stats.idx_stats.blocks - before_blocks;
443443
bstats->index_offset = index_start;

0 commit comments

Comments
 (0)