Skip to content

Commit f32af12

Browse files
committed
Merge branch 'jk/chunk-bounds'
The codepaths that read "chunk" formatted files have been corrected to pay attention to the chunk size and notice broken files. * jk/chunk-bounds: (21 commits) t5319: make corrupted large-offset test more robust chunk-format: drop pair_chunk_unsafe() commit-graph: detect out-of-order BIDX offsets commit-graph: check bounds when accessing BIDX chunk commit-graph: check bounds when accessing BDAT chunk commit-graph: bounds-check generation overflow chunk commit-graph: check size of generations chunk commit-graph: bounds-check base graphs chunk commit-graph: detect out-of-bounds extra-edges pointers commit-graph: check size of commit data chunk midx: check size of revindex chunk midx: bounds-check large offset chunk midx: check size of object offset chunk midx: enforce chunk alignment on reading midx: check size of pack names chunk commit-graph: check consistency of fanout table midx: check size of oid lookup chunk commit-graph: check size of oid fanout chunk midx: stop ignoring malformed oid fanout chunk t: add library for munging chunk-format files ...
2 parents ceadf0f + 7538f9d commit f32af12

15 files changed

+570
-47
lines changed

bloom.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,26 @@ static inline unsigned char get_bitmask(uint32_t pos)
2929
return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1));
3030
}
3131

32+
static int check_bloom_offset(struct commit_graph *g, uint32_t pos,
33+
uint32_t offset)
34+
{
35+
/*
36+
* Note that we allow offsets equal to the data size, which would set
37+
* our pointers at one past the end of the chunk memory. This is
38+
* necessary because the on-disk index points to the end of the
39+
* entries (so we can compute size by comparing adjacent ones). And
40+
* naturally the final entry's end is one-past-the-end of the chunk.
41+
*/
42+
if (offset <= g->chunk_bloom_data_size - BLOOMDATA_CHUNK_HEADER_SIZE)
43+
return 0;
44+
45+
warning("ignoring out-of-range offset (%"PRIuMAX") for changed-path"
46+
" filter at pos %"PRIuMAX" of %s (chunk size: %"PRIuMAX")",
47+
(uintmax_t)offset, (uintmax_t)pos,
48+
g->filename, (uintmax_t)g->chunk_bloom_data_size);
49+
return -1;
50+
}
51+
3252
static int load_bloom_filter_from_graph(struct commit_graph *g,
3353
struct bloom_filter *filter,
3454
uint32_t graph_pos)
@@ -51,6 +71,20 @@ static int load_bloom_filter_from_graph(struct commit_graph *g,
5171
else
5272
start_index = 0;
5373

74+
if (check_bloom_offset(g, lex_pos, end_index) < 0 ||
75+
check_bloom_offset(g, lex_pos - 1, start_index) < 0)
76+
return 0;
77+
78+
if (end_index < start_index) {
79+
warning("ignoring decreasing changed-path index offsets"
80+
" (%"PRIuMAX" > %"PRIuMAX") for positions"
81+
" %"PRIuMAX" and %"PRIuMAX" of %s",
82+
(uintmax_t)start_index, (uintmax_t)end_index,
83+
(uintmax_t)(lex_pos-1), (uintmax_t)lex_pos,
84+
g->filename);
85+
return 0;
86+
}
87+
5488
filter->len = end_index - start_index;
5589
filter->data = (unsigned char *)(g->chunk_bloom_data +
5690
sizeof(unsigned char) * start_index +

chunk-format.c

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,8 @@ int read_table_of_contents(struct chunkfile *cf,
102102
const unsigned char *mfile,
103103
size_t mfile_size,
104104
uint64_t toc_offset,
105-
int toc_length)
105+
int toc_length,
106+
unsigned expected_alignment)
106107
{
107108
int i;
108109
uint32_t chunk_id;
@@ -120,6 +121,11 @@ int read_table_of_contents(struct chunkfile *cf,
120121
error(_("terminating chunk id appears earlier than expected"));
121122
return 1;
122123
}
124+
if (chunk_offset % expected_alignment != 0) {
125+
error(_("chunk id %"PRIx32" not %d-byte aligned"),
126+
chunk_id, expected_alignment);
127+
return 1;
128+
}
123129

124130
table_of_contents += CHUNK_TOC_ENTRY_SIZE;
125131
next_chunk_offset = get_be64(table_of_contents + 4);
@@ -154,20 +160,28 @@ int read_table_of_contents(struct chunkfile *cf,
154160
return 0;
155161
}
156162

163+
struct pair_chunk_data {
164+
const unsigned char **p;
165+
size_t *size;
166+
};
167+
157168
static int pair_chunk_fn(const unsigned char *chunk_start,
158169
size_t chunk_size,
159170
void *data)
160171
{
161-
const unsigned char **p = data;
162-
*p = chunk_start;
172+
struct pair_chunk_data *pcd = data;
173+
*pcd->p = chunk_start;
174+
*pcd->size = chunk_size;
163175
return 0;
164176
}
165177

166178
int pair_chunk(struct chunkfile *cf,
167179
uint32_t chunk_id,
168-
const unsigned char **p)
180+
const unsigned char **p,
181+
size_t *size)
169182
{
170-
return read_chunk(cf, chunk_id, pair_chunk_fn, p);
183+
struct pair_chunk_data pcd = { .p = p, .size = size };
184+
return read_chunk(cf, chunk_id, pair_chunk_fn, &pcd);
171185
}
172186

173187
int read_chunk(struct chunkfile *cf,

chunk-format.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,20 +36,23 @@ int read_table_of_contents(struct chunkfile *cf,
3636
const unsigned char *mfile,
3737
size_t mfile_size,
3838
uint64_t toc_offset,
39-
int toc_length);
39+
int toc_length,
40+
unsigned expected_alignment);
4041

4142
#define CHUNK_NOT_FOUND (-2)
4243

4344
/*
4445
* Find 'chunk_id' in the given chunkfile and assign the
4546
* given pointer to the position in the mmap'd file where
46-
* that chunk begins.
47+
* that chunk begins. Likewise the "size" parameter is filled
48+
* with the size of the chunk.
4749
*
4850
* Returns CHUNK_NOT_FOUND if the chunk does not exist.
4951
*/
5052
int pair_chunk(struct chunkfile *cf,
5153
uint32_t chunk_id,
52-
const unsigned char **p);
54+
const unsigned char **p,
55+
size_t *size);
5356

5457
typedef int (*chunk_read_fn)(const unsigned char *chunk_start,
5558
size_t chunk_size, void *data);

commit-graph.c

Lines changed: 102 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,8 @@ struct commit_graph *load_commit_graph_one_fd_st(struct repository *r,
277277

278278
static int verify_commit_graph_lite(struct commit_graph *g)
279279
{
280+
int i;
281+
280282
/*
281283
* Basic validation shared between parse_commit_graph()
282284
* which'll be called every time the graph is used, and the
@@ -302,6 +304,30 @@ static int verify_commit_graph_lite(struct commit_graph *g)
302304
return 1;
303305
}
304306

307+
for (i = 0; i < 255; i++) {
308+
uint32_t oid_fanout1 = ntohl(g->chunk_oid_fanout[i]);
309+
uint32_t oid_fanout2 = ntohl(g->chunk_oid_fanout[i + 1]);
310+
311+
if (oid_fanout1 > oid_fanout2) {
312+
error("commit-graph fanout values out of order");
313+
return 1;
314+
}
315+
}
316+
if (ntohl(g->chunk_oid_fanout[255]) != g->num_commits) {
317+
error("commit-graph oid table and fanout disagree on size");
318+
return 1;
319+
}
320+
321+
return 0;
322+
}
323+
324+
static int graph_read_oid_fanout(const unsigned char *chunk_start,
325+
size_t chunk_size, void *data)
326+
{
327+
struct commit_graph *g = data;
328+
if (chunk_size != 256 * sizeof(uint32_t))
329+
return error("commit-graph oid fanout chunk is wrong size");
330+
g->chunk_oid_fanout = (const uint32_t *)chunk_start;
305331
return 0;
306332
}
307333

@@ -314,12 +340,54 @@ static int graph_read_oid_lookup(const unsigned char *chunk_start,
314340
return 0;
315341
}
316342

343+
static int graph_read_commit_data(const unsigned char *chunk_start,
344+
size_t chunk_size, void *data)
345+
{
346+
struct commit_graph *g = data;
347+
if (chunk_size != g->num_commits * GRAPH_DATA_WIDTH)
348+
return error("commit-graph commit data chunk is wrong size");
349+
g->chunk_commit_data = chunk_start;
350+
return 0;
351+
}
352+
353+
static int graph_read_generation_data(const unsigned char *chunk_start,
354+
size_t chunk_size, void *data)
355+
{
356+
struct commit_graph *g = data;
357+
if (chunk_size != g->num_commits * sizeof(uint32_t))
358+
return error("commit-graph generations chunk is wrong size");
359+
g->chunk_generation_data = chunk_start;
360+
return 0;
361+
}
362+
363+
static int graph_read_bloom_index(const unsigned char *chunk_start,
364+
size_t chunk_size, void *data)
365+
{
366+
struct commit_graph *g = data;
367+
if (chunk_size != g->num_commits * 4) {
368+
warning("commit-graph changed-path index chunk is too small");
369+
return -1;
370+
}
371+
g->chunk_bloom_indexes = chunk_start;
372+
return 0;
373+
}
374+
317375
static int graph_read_bloom_data(const unsigned char *chunk_start,
318376
size_t chunk_size, void *data)
319377
{
320378
struct commit_graph *g = data;
321379
uint32_t hash_version;
380+
381+
if (chunk_size < BLOOMDATA_CHUNK_HEADER_SIZE) {
382+
warning("ignoring too-small changed-path chunk"
383+
" (%"PRIuMAX" < %"PRIuMAX") in commit-graph file",
384+
(uintmax_t)chunk_size,
385+
(uintmax_t)BLOOMDATA_CHUNK_HEADER_SIZE);
386+
return -1;
387+
}
388+
322389
g->chunk_bloom_data = chunk_start;
390+
g->chunk_bloom_data_size = chunk_size;
323391
hash_version = get_be32(chunk_start);
324392

325393
if (hash_version != 1)
@@ -391,29 +459,31 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s,
391459
cf = init_chunkfile(NULL);
392460

393461
if (read_table_of_contents(cf, graph->data, graph_size,
394-
GRAPH_HEADER_SIZE, graph->num_chunks))
462+
GRAPH_HEADER_SIZE, graph->num_chunks, 1))
395463
goto free_and_return;
396464

397-
pair_chunk(cf, GRAPH_CHUNKID_OIDFANOUT,
398-
(const unsigned char **)&graph->chunk_oid_fanout);
465+
read_chunk(cf, GRAPH_CHUNKID_OIDFANOUT, graph_read_oid_fanout, graph);
399466
read_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, graph_read_oid_lookup, graph);
400-
pair_chunk(cf, GRAPH_CHUNKID_DATA, &graph->chunk_commit_data);
401-
pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges);
402-
pair_chunk(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs);
467+
read_chunk(cf, GRAPH_CHUNKID_DATA, graph_read_commit_data, graph);
468+
pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges,
469+
&graph->chunk_extra_edges_size);
470+
pair_chunk(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs,
471+
&graph->chunk_base_graphs_size);
403472

404473
if (s->commit_graph_generation_version >= 2) {
405-
pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA,
406-
&graph->chunk_generation_data);
474+
read_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA,
475+
graph_read_generation_data, graph);
407476
pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW,
408-
&graph->chunk_generation_data_overflow);
477+
&graph->chunk_generation_data_overflow,
478+
&graph->chunk_generation_data_overflow_size);
409479

410480
if (graph->chunk_generation_data)
411481
graph->read_generation_data = 1;
412482
}
413483

414484
if (s->commit_graph_read_changed_paths) {
415-
pair_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES,
416-
&graph->chunk_bloom_indexes);
485+
read_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES,
486+
graph_read_bloom_index, graph);
417487
read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA,
418488
graph_read_bloom_data, graph);
419489
}
@@ -510,6 +580,11 @@ static int add_graph_to_chain(struct commit_graph *g,
510580
return 0;
511581
}
512582

583+
if (g->chunk_base_graphs_size / g->hash_len < n) {
584+
warning(_("commit-graph base graphs chunk is too small"));
585+
return 0;
586+
}
587+
513588
while (n) {
514589
n--;
515590

@@ -837,7 +912,10 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g,
837912
die(_("commit-graph requires overflow generation data but has none"));
838913

839914
offset_pos = offset ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW;
840-
graph_data->generation = item->date + get_be64(g->chunk_generation_data_overflow + st_mult(8, offset_pos));
915+
if (g->chunk_generation_data_overflow_size / sizeof(uint64_t) <= offset_pos)
916+
die(_("commit-graph overflow generation data is too small"));
917+
graph_data->generation = item->date +
918+
get_be64(g->chunk_generation_data_overflow + sizeof(uint64_t) * offset_pos);
841919
} else
842920
graph_data->generation = item->date + offset;
843921
} else
@@ -857,7 +935,7 @@ static int fill_commit_in_graph(struct repository *r,
857935
struct commit_graph *g, uint32_t pos)
858936
{
859937
uint32_t edge_value;
860-
uint32_t *parent_data_ptr;
938+
uint32_t parent_data_pos;
861939
struct commit_list **pptr;
862940
const unsigned char *commit_data;
863941
uint32_t lex_index;
@@ -889,14 +967,21 @@ static int fill_commit_in_graph(struct repository *r,
889967
return 1;
890968
}
891969

892-
parent_data_ptr = (uint32_t*)(g->chunk_extra_edges +
893-
st_mult(4, edge_value & GRAPH_EDGE_LAST_MASK));
970+
parent_data_pos = edge_value & GRAPH_EDGE_LAST_MASK;
894971
do {
895-
edge_value = get_be32(parent_data_ptr);
972+
if (g->chunk_extra_edges_size / sizeof(uint32_t) <= parent_data_pos) {
973+
error("commit-graph extra-edges pointer out of bounds");
974+
free_commit_list(item->parents);
975+
item->parents = NULL;
976+
item->object.parsed = 0;
977+
return 0;
978+
}
979+
edge_value = get_be32(g->chunk_extra_edges +
980+
sizeof(uint32_t) * parent_data_pos);
896981
pptr = insert_parent_or_die(r, g,
897982
edge_value & GRAPH_EDGE_LAST_MASK,
898983
pptr);
899-
parent_data_ptr++;
984+
parent_data_pos++;
900985
} while (!(edge_value & GRAPH_LAST_EDGE));
901986

902987
return 1;

commit-graph.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,14 @@ struct commit_graph {
9494
const unsigned char *chunk_commit_data;
9595
const unsigned char *chunk_generation_data;
9696
const unsigned char *chunk_generation_data_overflow;
97+
size_t chunk_generation_data_overflow_size;
9798
const unsigned char *chunk_extra_edges;
99+
size_t chunk_extra_edges_size;
98100
const unsigned char *chunk_base_graphs;
101+
size_t chunk_base_graphs_size;
99102
const unsigned char *chunk_bloom_indexes;
100103
const unsigned char *chunk_bloom_data;
104+
size_t chunk_bloom_data_size;
101105

102106
struct topo_level_slab *topo_levels;
103107
struct bloom_filter_settings *bloom_filter_settings;

0 commit comments

Comments
 (0)