Skip to content

Commit b80236d

Browse files
ttaylorrgitster
authored andcommitted
midx: support reading incremental MIDX chains
Now that the MIDX machinery's internals have been taught to understand incremental MIDXs over the previous handful of commits, the MIDX machinery itself can begin reading incremental MIDXs. (Note that while the on-disk format for incremental MIDXs has been defined, the writing end has not been implemented. This will take place in the commit after next.) The core of this change involves following the order specified in the MIDX chain in reverse and opening up MIDXs in the chain one-by-one, adding them to the previous layer's `->base_midx` pointer at each step. In order to implement this, the `load_multi_pack_index()` function is taught to call a new `load_multi_pack_index_chain()` function if loading a non-incremental MIDX failed via `load_multi_pack_index_one()`. When loading a MIDX chain, `load_midx_chain_fd_st()` reads each line in the file one-by-one and dispatches calls to `load_multi_pack_index_one()` to read each layer of the MIDX chain. When a layer was successfully read, it is added to the MIDX chain by calling `add_midx_to_chain()` which validates the contents of the `BASE` chunk, performs some bounds checks on the number of combined packs and objects, and attaches the new MIDX by assigning its `base_midx` pointer to the existing part of the chain. As a supplement to this, introduce a new mode in the test-read-midx test-tool which allows us to read the information for a specific MIDX in the chain by specifying its trailing checksum via the command-line arguments like so: $ test-tool read-midx .git/objects [checksum] Signed-off-by: Taylor Blau <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 97fd770 commit b80236d

File tree

4 files changed

+201
-19
lines changed

4 files changed

+201
-19
lines changed

midx.c

Lines changed: 174 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -91,39 +91,36 @@ static int midx_read_object_offsets(const unsigned char *chunk_start,
9191

9292
#define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + the_hash_algo->rawsz)
9393

94-
struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local)
94+
static struct multi_pack_index *load_multi_pack_index_one(const char *object_dir,
95+
const char *midx_name,
96+
int local)
9597
{
9698
struct multi_pack_index *m = NULL;
9799
int fd;
98100
struct stat st;
99101
size_t midx_size;
100102
void *midx_map = NULL;
101103
uint32_t hash_version;
102-
struct strbuf midx_name = STRBUF_INIT;
103104
uint32_t i;
104105
const char *cur_pack_name;
105106
struct chunkfile *cf = NULL;
106107

107-
get_midx_filename(&midx_name, object_dir);
108-
109-
fd = git_open(midx_name.buf);
108+
fd = git_open(midx_name);
110109

111110
if (fd < 0)
112111
goto cleanup_fail;
113112
if (fstat(fd, &st)) {
114-
error_errno(_("failed to read %s"), midx_name.buf);
113+
error_errno(_("failed to read %s"), midx_name);
115114
goto cleanup_fail;
116115
}
117116

118117
midx_size = xsize_t(st.st_size);
119118

120119
if (midx_size < MIDX_MIN_SIZE) {
121-
error(_("multi-pack-index file %s is too small"), midx_name.buf);
120+
error(_("multi-pack-index file %s is too small"), midx_name);
122121
goto cleanup_fail;
123122
}
124123

125-
strbuf_release(&midx_name);
126-
127124
midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0);
128125
close(fd);
129126

@@ -213,7 +210,6 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
213210

214211
cleanup_fail:
215212
free(m);
216-
strbuf_release(&midx_name);
217213
free_chunkfile(cf);
218214
if (midx_map)
219215
munmap(midx_map, midx_size);
@@ -222,6 +218,173 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
222218
return NULL;
223219
}
224220

221+
void get_midx_chain_dirname(struct strbuf *buf, const char *object_dir)
222+
{
223+
strbuf_addf(buf, "%s/pack/multi-pack-index.d", object_dir);
224+
}
225+
226+
void get_midx_chain_filename(struct strbuf *buf, const char *object_dir)
227+
{
228+
get_midx_chain_dirname(buf, object_dir);
229+
strbuf_addstr(buf, "/multi-pack-index-chain");
230+
}
231+
232+
void get_split_midx_filename_ext(struct strbuf *buf, const char *object_dir,
233+
const unsigned char *hash, const char *ext)
234+
{
235+
get_midx_chain_dirname(buf, object_dir);
236+
strbuf_addf(buf, "/multi-pack-index-%s.%s", hash_to_hex(hash), ext);
237+
}
238+
239+
static int open_multi_pack_index_chain(const char *chain_file,
240+
int *fd, struct stat *st)
241+
{
242+
*fd = git_open(chain_file);
243+
if (*fd < 0)
244+
return 0;
245+
if (fstat(*fd, st)) {
246+
close(*fd);
247+
return 0;
248+
}
249+
if (st->st_size < the_hash_algo->hexsz) {
250+
close(*fd);
251+
if (!st->st_size) {
252+
/* treat empty files the same as missing */
253+
errno = ENOENT;
254+
} else {
255+
warning(_("multi-pack-index chain file too small"));
256+
errno = EINVAL;
257+
}
258+
return 0;
259+
}
260+
return 1;
261+
}
262+
263+
static int add_midx_to_chain(struct multi_pack_index *midx,
264+
struct multi_pack_index *midx_chain,
265+
struct object_id *oids,
266+
int n)
267+
{
268+
if (midx_chain) {
269+
if (unsigned_add_overflows(midx_chain->num_packs,
270+
midx_chain->num_packs_in_base)) {
271+
warning(_("pack count in base MIDX too high: %"PRIuMAX),
272+
(uintmax_t)midx_chain->num_packs_in_base);
273+
return 0;
274+
}
275+
if (unsigned_add_overflows(midx_chain->num_objects,
276+
midx_chain->num_objects_in_base)) {
277+
warning(_("object count in base MIDX too high: %"PRIuMAX),
278+
(uintmax_t)midx_chain->num_objects_in_base);
279+
return 0;
280+
}
281+
midx->num_packs_in_base = midx_chain->num_packs +
282+
midx_chain->num_packs_in_base;
283+
midx->num_objects_in_base = midx_chain->num_objects +
284+
midx_chain->num_objects_in_base;
285+
}
286+
287+
midx->base_midx = midx_chain;
288+
midx->has_chain = 1;
289+
290+
return 1;
291+
}
292+
293+
static struct multi_pack_index *load_midx_chain_fd_st(const char *object_dir,
294+
int local,
295+
int fd, struct stat *st,
296+
int *incomplete_chain)
297+
{
298+
struct multi_pack_index *midx_chain = NULL;
299+
struct strbuf buf = STRBUF_INIT;
300+
struct object_id *layers = NULL;
301+
int valid = 1;
302+
uint32_t i, count;
303+
FILE *fp = xfdopen(fd, "r");
304+
305+
count = st->st_size / (the_hash_algo->hexsz + 1);
306+
CALLOC_ARRAY(layers, count);
307+
308+
for (i = 0; i < count; i++) {
309+
struct multi_pack_index *m;
310+
311+
if (strbuf_getline_lf(&buf, fp) == EOF)
312+
break;
313+
314+
if (get_oid_hex(buf.buf, &layers[i])) {
315+
warning(_("invalid multi-pack-index chain: line '%s' "
316+
"not a hash"),
317+
buf.buf);
318+
valid = 0;
319+
break;
320+
}
321+
322+
valid = 0;
323+
324+
strbuf_reset(&buf);
325+
get_split_midx_filename_ext(&buf, object_dir, layers[i].hash,
326+
MIDX_EXT_MIDX);
327+
m = load_multi_pack_index_one(object_dir, buf.buf, local);
328+
329+
if (m) {
330+
if (add_midx_to_chain(m, midx_chain, layers, i)) {
331+
midx_chain = m;
332+
valid = 1;
333+
} else {
334+
close_midx(m);
335+
}
336+
}
337+
if (!valid) {
338+
warning(_("unable to find all multi-pack index files"));
339+
break;
340+
}
341+
}
342+
343+
free(layers);
344+
fclose(fp);
345+
strbuf_release(&buf);
346+
347+
*incomplete_chain = !valid;
348+
return midx_chain;
349+
}
350+
351+
static struct multi_pack_index *load_multi_pack_index_chain(const char *object_dir,
352+
int local)
353+
{
354+
struct strbuf chain_file = STRBUF_INIT;
355+
struct stat st;
356+
int fd;
357+
struct multi_pack_index *m = NULL;
358+
359+
get_midx_chain_filename(&chain_file, object_dir);
360+
if (open_multi_pack_index_chain(chain_file.buf, &fd, &st)) {
361+
int incomplete;
362+
/* ownership of fd is taken over by load function */
363+
m = load_midx_chain_fd_st(object_dir, local, fd, &st,
364+
&incomplete);
365+
}
366+
367+
strbuf_release(&chain_file);
368+
return m;
369+
}
370+
371+
struct multi_pack_index *load_multi_pack_index(const char *object_dir,
372+
int local)
373+
{
374+
struct strbuf midx_name = STRBUF_INIT;
375+
struct multi_pack_index *m;
376+
377+
get_midx_filename(&midx_name, object_dir);
378+
379+
m = load_multi_pack_index_one(object_dir, midx_name.buf, local);
380+
if (!m)
381+
m = load_multi_pack_index_chain(object_dir, local);
382+
383+
strbuf_release(&midx_name);
384+
385+
return m;
386+
}
387+
225388
void close_midx(struct multi_pack_index *m)
226389
{
227390
uint32_t i;
@@ -230,6 +393,7 @@ void close_midx(struct multi_pack_index *m)
230393
return;
231394

232395
close_midx(m->next);
396+
close_midx(m->base_midx);
233397

234398
munmap((unsigned char *)m->data, m->data_len);
235399

midx.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ struct bitmapped_pack;
2424
#define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */
2525
#define MIDX_CHUNKID_LARGEOFFSETS 0x4c4f4646 /* "LOFF" */
2626
#define MIDX_CHUNKID_REVINDEX 0x52494458 /* "RIDX" */
27+
#define MIDX_CHUNKID_BASE 0x42415345 /* "BASE" */
2728
#define MIDX_CHUNK_OFFSET_WIDTH (2 * sizeof(uint32_t))
2829
#define MIDX_LARGE_OFFSET_NEEDED 0x80000000
2930

@@ -50,6 +51,7 @@ struct multi_pack_index {
5051
int preferred_pack_idx;
5152

5253
int local;
54+
int has_chain;
5355

5456
const unsigned char *chunk_pack_names;
5557
size_t chunk_pack_names_len;
@@ -80,11 +82,16 @@ struct multi_pack_index {
8082

8183
#define MIDX_EXT_REV "rev"
8284
#define MIDX_EXT_BITMAP "bitmap"
85+
#define MIDX_EXT_MIDX "midx"
8386

8487
const unsigned char *get_midx_checksum(struct multi_pack_index *m);
8588
void get_midx_filename(struct strbuf *out, const char *object_dir);
8689
void get_midx_filename_ext(struct strbuf *out, const char *object_dir,
8790
const unsigned char *hash, const char *ext);
91+
void get_midx_chain_dirname(struct strbuf *buf, const char *object_dir);
92+
void get_midx_chain_filename(struct strbuf *buf, const char *object_dir);
93+
void get_split_midx_filename_ext(struct strbuf *buf, const char *object_dir,
94+
const unsigned char *hash, const char *ext);
8895

8996
struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local);
9097
int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id);

packfile.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -880,7 +880,8 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
880880
if (!report_garbage)
881881
return;
882882

883-
if (!strcmp(file_name, "multi-pack-index"))
883+
if (!strcmp(file_name, "multi-pack-index") ||
884+
!strcmp(file_name, "multi-pack-index.d"))
884885
return;
885886
if (starts_with(file_name, "multi-pack-index") &&
886887
(ends_with(file_name, ".bitmap") || ends_with(file_name, ".rev")))
@@ -1064,7 +1065,7 @@ struct packed_git *get_all_packs(struct repository *r)
10641065
prepare_packed_git(r);
10651066
for (m = r->objects->multi_pack_index; m; m = m->next) {
10661067
uint32_t i;
1067-
for (i = 0; i < m->num_packs; i++)
1068+
for (i = 0; i < m->num_packs + m->num_packs_in_base; i++)
10681069
prepare_midx_pack(r, m, i);
10691070
}
10701071

t/helper/test-read-midx.c

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@
99
#include "packfile.h"
1010
#include "setup.h"
1111
#include "gettext.h"
12+
#include "pack-revindex.h"
1213

13-
static int read_midx_file(const char *object_dir, int show_objects)
14+
static int read_midx_file(const char *object_dir, const char *checksum,
15+
int show_objects)
1416
{
1517
uint32_t i;
1618
struct multi_pack_index *m;
@@ -21,6 +23,13 @@ static int read_midx_file(const char *object_dir, int show_objects)
2123
if (!m)
2224
return 1;
2325

26+
if (checksum) {
27+
while (m && strcmp(hash_to_hex(get_midx_checksum(m)), checksum))
28+
m = m->base_midx;
29+
if (!m)
30+
return 1;
31+
}
32+
2433
printf("header: %08x %d %d %d %d\n",
2534
m->signature,
2635
m->version,
@@ -54,7 +63,8 @@ static int read_midx_file(const char *object_dir, int show_objects)
5463
struct pack_entry e;
5564

5665
for (i = 0; i < m->num_objects; i++) {
57-
nth_midxed_object_oid(&oid, m, i);
66+
nth_midxed_object_oid(&oid, m,
67+
i + m->num_objects_in_base);
5868
fill_midx_entry(the_repository, &oid, &e, m);
5969

6070
printf("%s %"PRIu64"\t%s\n",
@@ -111,7 +121,7 @@ static int read_midx_bitmapped_packs(const char *object_dir)
111121
if (!midx)
112122
return 1;
113123

114-
for (i = 0; i < midx->num_packs; i++) {
124+
for (i = 0; i < midx->num_packs + midx->num_packs_in_base; i++) {
115125
if (nth_bitmapped_pack(the_repository, midx, &pack, i) < 0)
116126
return 1;
117127

@@ -127,16 +137,16 @@ static int read_midx_bitmapped_packs(const char *object_dir)
127137

128138
int cmd__read_midx(int argc, const char **argv)
129139
{
130-
if (!(argc == 2 || argc == 3))
131-
usage("read-midx [--show-objects|--checksum|--preferred-pack|--bitmap] <object-dir>");
140+
if (!(argc == 2 || argc == 3 || argc == 4))
141+
usage("read-midx [--show-objects|--checksum|--preferred-pack|--bitmap] <object-dir> <checksum>");
132142

133143
if (!strcmp(argv[1], "--show-objects"))
134-
return read_midx_file(argv[2], 1);
144+
return read_midx_file(argv[2], argv[3], 1);
135145
else if (!strcmp(argv[1], "--checksum"))
136146
return read_midx_checksum(argv[2]);
137147
else if (!strcmp(argv[1], "--preferred-pack"))
138148
return read_midx_preferred_pack(argv[2]);
139149
else if (!strcmp(argv[1], "--bitmap"))
140150
return read_midx_bitmapped_packs(argv[2]);
141-
return read_midx_file(argv[1], 0);
151+
return read_midx_file(argv[1], argv[2], 0);
142152
}

0 commit comments

Comments
 (0)