Skip to content

Commit 5f0879f

Browse files
derrickstoleegitster
authored andcommitted
chunk-format: create read chunk API
Add the capability to read the table of contents, then pair the chunks with necessary logic using read_chunk_fn pointers. Callers will be added in future changes, but the typical outline will be: 1. initialize a 'struct chunkfile' with init_chunkfile(NULL). 2. call read_table_of_contents(). 3. for each chunk to parse, a. call pair_chunk() to assign a pointer with the chunk position, or b. call read_chunk() to run a callback on the chunk start and size. 4. call free_chunkfile() to clear the 'struct chunkfile' data. We are re-using the anonymous 'struct chunkfile' data, as it is internal to the chunk-format API. This gives it essentially two modes: write and read. If the same struct instance was used for both reads and writes, then there would be failures. Helped-by: Junio C Hamano <[email protected]> Signed-off-by: Derrick Stolee <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 63a8f0e commit 5f0879f

File tree

2 files changed

+127
-0
lines changed

2 files changed

+127
-0
lines changed

chunk-format.c

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ struct chunk_info {
1111
uint32_t id;
1212
uint64_t size;
1313
chunk_write_fn write_fn;
14+
15+
const void *start;
1416
};
1517

1618
struct chunkfile {
@@ -88,3 +90,81 @@ int write_chunkfile(struct chunkfile *cf, void *data)
8890

8991
return 0;
9092
}
93+
94+
int read_table_of_contents(struct chunkfile *cf,
95+
const unsigned char *mfile,
96+
size_t mfile_size,
97+
uint64_t toc_offset,
98+
int toc_length)
99+
{
100+
uint32_t chunk_id;
101+
const unsigned char *table_of_contents = mfile + toc_offset;
102+
103+
ALLOC_GROW(cf->chunks, toc_length, cf->chunks_alloc);
104+
105+
while (toc_length--) {
106+
uint64_t chunk_offset, next_chunk_offset;
107+
108+
chunk_id = get_be32(table_of_contents);
109+
chunk_offset = get_be64(table_of_contents + 4);
110+
111+
if (!chunk_id) {
112+
error(_("terminating chunk id appears earlier than expected"));
113+
return 1;
114+
}
115+
116+
table_of_contents += CHUNK_TOC_ENTRY_SIZE;
117+
next_chunk_offset = get_be64(table_of_contents + 4);
118+
119+
if (next_chunk_offset < chunk_offset ||
120+
next_chunk_offset > mfile_size - the_hash_algo->rawsz) {
121+
error(_("improper chunk offset(s) %"PRIx64" and %"PRIx64""),
122+
chunk_offset, next_chunk_offset);
123+
return -1;
124+
}
125+
126+
cf->chunks[cf->chunks_nr].id = chunk_id;
127+
cf->chunks[cf->chunks_nr].start = mfile + chunk_offset;
128+
cf->chunks[cf->chunks_nr].size = next_chunk_offset - chunk_offset;
129+
cf->chunks_nr++;
130+
}
131+
132+
chunk_id = get_be32(table_of_contents);
133+
if (chunk_id) {
134+
error(_("final chunk has non-zero id %"PRIx32""), chunk_id);
135+
return -1;
136+
}
137+
138+
return 0;
139+
}
140+
141+
static int pair_chunk_fn(const unsigned char *chunk_start,
142+
size_t chunk_size,
143+
void *data)
144+
{
145+
const unsigned char **p = data;
146+
*p = chunk_start;
147+
return 0;
148+
}
149+
150+
int pair_chunk(struct chunkfile *cf,
151+
uint32_t chunk_id,
152+
const unsigned char **p)
153+
{
154+
return read_chunk(cf, chunk_id, pair_chunk_fn, p);
155+
}
156+
157+
int read_chunk(struct chunkfile *cf,
158+
uint32_t chunk_id,
159+
chunk_read_fn fn,
160+
void *data)
161+
{
162+
int i;
163+
164+
for (i = 0; i < cf->chunks_nr; i++) {
165+
if (cf->chunks[i].id == chunk_id)
166+
return fn(cf->chunks[i].start, cf->chunks[i].size, data);
167+
}
168+
169+
return CHUNK_NOT_FOUND;
170+
}

chunk-format.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,20 @@ struct chunkfile;
88

99
#define CHUNK_TOC_ENTRY_SIZE (sizeof(uint32_t) + sizeof(uint64_t))
1010

11+
/*
12+
* Initialize a 'struct chunkfile' for writing _or_ reading a file
13+
* with the chunk format.
14+
*
15+
* If writing a file, supply a non-NULL 'struct hashfile *' that will
16+
* be used to write.
17+
*
18+
* If reading a file, use a NULL 'struct hashfile *' and then call
19+
* read_table_of_contents(). Supply the memory-mapped data to the
20+
* pair_chunk() or read_chunk() methods, as appropriate.
21+
*
22+
* DO NOT MIX THESE MODES. Use different 'struct chunkfile' instances
23+
* for reading and writing.
24+
*/
1125
struct chunkfile *init_chunkfile(struct hashfile *f);
1226
void free_chunkfile(struct chunkfile *cf);
1327
int get_num_chunks(struct chunkfile *cf);
@@ -18,4 +32,37 @@ void add_chunk(struct chunkfile *cf,
1832
chunk_write_fn fn);
1933
int write_chunkfile(struct chunkfile *cf, void *data);
2034

35+
int read_table_of_contents(struct chunkfile *cf,
36+
const unsigned char *mfile,
37+
size_t mfile_size,
38+
uint64_t toc_offset,
39+
int toc_length);
40+
41+
#define CHUNK_NOT_FOUND (-2)
42+
43+
/*
44+
* Find 'chunk_id' in the given chunkfile and assign the
45+
* given pointer to the position in the mmap'd file where
46+
* that chunk begins.
47+
*
48+
* Returns CHUNK_NOT_FOUND if the chunk does not exist.
49+
*/
50+
int pair_chunk(struct chunkfile *cf,
51+
uint32_t chunk_id,
52+
const unsigned char **p);
53+
54+
typedef int (*chunk_read_fn)(const unsigned char *chunk_start,
55+
size_t chunk_size, void *data);
56+
/*
57+
* Find 'chunk_id' in the given chunkfile and call the
58+
* given chunk_read_fn method with the information for
59+
* that chunk.
60+
*
61+
* Returns CHUNK_NOT_FOUND if the chunk does not exist.
62+
*/
63+
int read_chunk(struct chunkfile *cf,
64+
uint32_t chunk_id,
65+
chunk_read_fn fn,
66+
void *data);
67+
2168
#endif

0 commit comments

Comments
 (0)