Skip to content

Commit 570b8b8

Browse files
peffgitster
authored andcommitted
chunk-format: note that pair_chunk() is unsafe
The pair_chunk() function is provided as an easy helper for parsing chunks that just want a pointer to a set of bytes. But every caller has a hidden bug: because we return only the pointer without the matching chunk size, the callers have no clue how many bytes they are allowed to look at. And as a result, they may read off the end of the mmap'd data when the on-disk file does not match their expectations. Since chunk files are typically used for local-repository data like commit-graph files and midx's, the security implications here are pretty mild. The worst that can happen is that you hand somebody a corrupted repository tarball, and running Git on it does an out-of-bounds read and crashes. So it's worth being more defensive, but we don't need to drop everything and fix every caller immediately. I noticed the problem because the pair_chunk_fn() callback does not look at its chunk_size argument, and wanted to annotate it to silence -Wunused-parameter. We could do that now, but we'd lose the hint that this code should be audited and fixed. So instead, let's set ourselves up for going down that path: 1. Provide a pair_chunk() function that does return the size, which prepares us for fixing these cases. 2. Rename the existing function to pair_chunk_unsafe(). That gives us an easy way to grep for cases which still need to be fixed, and the name should cause anybody adding new calls to think twice before using it. There are no callers of the "safe" version yet, but we'll add some in subsequent patches. Signed-off-by: Jeff King <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 3a06386 commit 570b8b8

File tree

4 files changed

+49
-18
lines changed

4 files changed

+49
-18
lines changed

chunk-format.c

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,20 +154,36 @@ int read_table_of_contents(struct chunkfile *cf,
154154
return 0;
155155
}
156156

157+
struct pair_chunk_data {
158+
const unsigned char **p;
159+
size_t *size;
160+
};
161+
157162
static int pair_chunk_fn(const unsigned char *chunk_start,
158163
size_t chunk_size,
159164
void *data)
160165
{
161-
const unsigned char **p = data;
162-
*p = chunk_start;
166+
struct pair_chunk_data *pcd = data;
167+
*pcd->p = chunk_start;
168+
*pcd->size = chunk_size;
163169
return 0;
164170
}
165171

166172
int pair_chunk(struct chunkfile *cf,
167173
uint32_t chunk_id,
168-
const unsigned char **p)
174+
const unsigned char **p,
175+
size_t *size)
176+
{
177+
struct pair_chunk_data pcd = { .p = p, .size = size };
178+
return read_chunk(cf, chunk_id, pair_chunk_fn, &pcd);
179+
}
180+
181+
int pair_chunk_unsafe(struct chunkfile *cf,
182+
uint32_t chunk_id,
183+
const unsigned char **p)
169184
{
170-
return read_chunk(cf, chunk_id, pair_chunk_fn, p);
185+
size_t dummy;
186+
return pair_chunk(cf, chunk_id, p, &dummy);
171187
}
172188

173189
int read_chunk(struct chunkfile *cf,

chunk-format.h

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,28 @@ int read_table_of_contents(struct chunkfile *cf,
4343
/*
4444
* Find 'chunk_id' in the given chunkfile and assign the
4545
* given pointer to the position in the mmap'd file where
46-
* that chunk begins.
46+
* that chunk begins. Likewise the "size" parameter is filled
47+
* with the size of the chunk.
4748
*
4849
* Returns CHUNK_NOT_FOUND if the chunk does not exist.
4950
*/
5051
int pair_chunk(struct chunkfile *cf,
5152
uint32_t chunk_id,
52-
const unsigned char **p);
53+
const unsigned char **p,
54+
size_t *size);
55+
56+
/*
57+
* Unsafe version of pair_chunk; it does not return the size,
58+
* meaning that the caller cannot possibly be careful about
59+
* reading out of bounds from the mapped memory.
60+
*
61+
* No new callers should use this function, and old callers should
62+
* be audited and migrated over to using the regular pair_chunk()
63+
* function.
64+
*/
65+
int pair_chunk_unsafe(struct chunkfile *cf,
66+
uint32_t chunk_id,
67+
const unsigned char **p);
5368

5469
typedef int (*chunk_read_fn)(const unsigned char *chunk_start,
5570
size_t chunk_size, void *data);

commit-graph.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -394,25 +394,25 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s,
394394
GRAPH_HEADER_SIZE, graph->num_chunks))
395395
goto free_and_return;
396396

397-
pair_chunk(cf, GRAPH_CHUNKID_OIDFANOUT,
397+
pair_chunk_unsafe(cf, GRAPH_CHUNKID_OIDFANOUT,
398398
(const unsigned char **)&graph->chunk_oid_fanout);
399399
read_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, graph_read_oid_lookup, graph);
400-
pair_chunk(cf, GRAPH_CHUNKID_DATA, &graph->chunk_commit_data);
401-
pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges);
402-
pair_chunk(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs);
400+
pair_chunk_unsafe(cf, GRAPH_CHUNKID_DATA, &graph->chunk_commit_data);
401+
pair_chunk_unsafe(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges);
402+
pair_chunk_unsafe(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs);
403403

404404
if (s->commit_graph_generation_version >= 2) {
405-
pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA,
405+
pair_chunk_unsafe(cf, GRAPH_CHUNKID_GENERATION_DATA,
406406
&graph->chunk_generation_data);
407-
pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW,
407+
pair_chunk_unsafe(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW,
408408
&graph->chunk_generation_data_overflow);
409409

410410
if (graph->chunk_generation_data)
411411
graph->read_generation_data = 1;
412412
}
413413

414414
if (s->commit_graph_read_changed_paths) {
415-
pair_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES,
415+
pair_chunk_unsafe(cf, GRAPH_CHUNKID_BLOOMINDEXES,
416416
&graph->chunk_bloom_indexes);
417417
read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA,
418418
graph_read_bloom_data, graph);

midx.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -143,19 +143,19 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
143143
MIDX_HEADER_SIZE, m->num_chunks))
144144
goto cleanup_fail;
145145

146-
if (pair_chunk(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names) == CHUNK_NOT_FOUND)
146+
if (pair_chunk_unsafe(cf, MIDX_CHUNKID_PACKNAMES, &m->chunk_pack_names) == CHUNK_NOT_FOUND)
147147
die(_("multi-pack-index missing required pack-name chunk"));
148148
if (read_chunk(cf, MIDX_CHUNKID_OIDFANOUT, midx_read_oid_fanout, m) == CHUNK_NOT_FOUND)
149149
die(_("multi-pack-index missing required OID fanout chunk"));
150-
if (pair_chunk(cf, MIDX_CHUNKID_OIDLOOKUP, &m->chunk_oid_lookup) == CHUNK_NOT_FOUND)
150+
if (pair_chunk_unsafe(cf, MIDX_CHUNKID_OIDLOOKUP, &m->chunk_oid_lookup) == CHUNK_NOT_FOUND)
151151
die(_("multi-pack-index missing required OID lookup chunk"));
152-
if (pair_chunk(cf, MIDX_CHUNKID_OBJECTOFFSETS, &m->chunk_object_offsets) == CHUNK_NOT_FOUND)
152+
if (pair_chunk_unsafe(cf, MIDX_CHUNKID_OBJECTOFFSETS, &m->chunk_object_offsets) == CHUNK_NOT_FOUND)
153153
die(_("multi-pack-index missing required object offsets chunk"));
154154

155-
pair_chunk(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets);
155+
pair_chunk_unsafe(cf, MIDX_CHUNKID_LARGEOFFSETS, &m->chunk_large_offsets);
156156

157157
if (git_env_bool("GIT_TEST_MIDX_READ_RIDX", 1))
158-
pair_chunk(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex);
158+
pair_chunk_unsafe(cf, MIDX_CHUNKID_REVINDEX, &m->chunk_revindex);
159159

160160
m->num_objects = ntohl(m->chunk_oid_fanout[255]);
161161

0 commit comments

Comments
 (0)