Skip to content

Commit 468b322

Browse files
committed
Merge branch 'ds/multi-pack-verify'
"git multi-pack-index" learned to detect corruption in the .midx file it uses, and this feature has been integrated into "git fsck". * ds/multi-pack-verify: fsck: verify multi-pack-index multi-pack-index: report progress during 'verify' multi-pack-index: verify object offsets multi-pack-index: fix 32-bit vs 64-bit size check multi-pack-index: verify oid lookup order multi-pack-index: verify oid fanout order multi-pack-index: verify missing pack multi-pack-index: verify packname order multi-pack-index: verify corrupt chunk lookup table multi-pack-index: verify bad header multi-pack-index: add 'verify' verb
2 parents d555663 + 66ec039 commit 468b322

File tree

6 files changed

+262
-20
lines changed

6 files changed

+262
-20
lines changed

Documentation/git-multi-pack-index.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ write::
2727
When given as the verb, write a new MIDX file to
2828
`<dir>/packs/multi-pack-index`.
2929

30+
verify::
31+
When given as the verb, verify the contents of the MIDX file
32+
at `<dir>/packs/multi-pack-index`.
33+
3034

3135
EXAMPLES
3236
--------
@@ -43,6 +47,12 @@ $ git multi-pack-index write
4347
$ git multi-pack-index --object-dir <alt> write
4448
-----------------------------------------------
4549

50+
* Verify the MIDX file for the packfiles in the current .git folder.
51+
+
52+
-----------------------------------------------
53+
$ git multi-pack-index verify
54+
-----------------------------------------------
55+
4656

4757
SEE ALSO
4858
--------

builtin/fsck.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -848,5 +848,23 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
848848
}
849849
}
850850

851+
if (!git_config_get_bool("core.multipackindex", &i) && i) {
852+
struct child_process midx_verify = CHILD_PROCESS_INIT;
853+
const char *midx_argv[] = { "multi-pack-index", "verify", NULL, NULL, NULL };
854+
855+
midx_verify.argv = midx_argv;
856+
midx_verify.git_cmd = 1;
857+
if (run_command(&midx_verify))
858+
errors_found |= ERROR_COMMIT_GRAPH;
859+
860+
prepare_alt_odb(the_repository);
861+
for (alt = the_repository->objects->alt_odb_list; alt; alt = alt->next) {
862+
midx_argv[2] = "--object-dir";
863+
midx_argv[3] = alt->path;
864+
if (run_command(&midx_verify))
865+
errors_found |= ERROR_COMMIT_GRAPH;
866+
}
867+
}
868+
851869
return errors_found;
852870
}

builtin/multi-pack-index.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include "midx.h"
66

77
static char const * const builtin_multi_pack_index_usage[] = {
8-
N_("git multi-pack-index [--object-dir=<dir>] write"),
8+
N_("git multi-pack-index [--object-dir=<dir>] (write|verify)"),
99
NULL
1010
};
1111

@@ -42,6 +42,8 @@ int cmd_multi_pack_index(int argc, const char **argv,
4242

4343
if (!strcmp(argv[0], "write"))
4444
return write_midx_file(opts.object_dir);
45+
if (!strcmp(argv[0], "verify"))
46+
return verify_midx_file(opts.object_dir);
4547

4648
die(_("unrecognized verb: %s"), argv[0]);
4749
}

midx.c

Lines changed: 95 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "object-store.h"
88
#include "sha1-lookup.h"
99
#include "midx.h"
10+
#include "progress.h"
1011

1112
#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
1213
#define MIDX_VERSION 1
@@ -76,24 +77,18 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
7677
m->local = local;
7778

7879
m->signature = get_be32(m->data);
79-
if (m->signature != MIDX_SIGNATURE) {
80-
error(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"),
80+
if (m->signature != MIDX_SIGNATURE)
81+
die(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"),
8182
m->signature, MIDX_SIGNATURE);
82-
goto cleanup_fail;
83-
}
8483

8584
m->version = m->data[MIDX_BYTE_FILE_VERSION];
86-
if (m->version != MIDX_VERSION) {
87-
error(_("multi-pack-index version %d not recognized"),
85+
if (m->version != MIDX_VERSION)
86+
die(_("multi-pack-index version %d not recognized"),
8887
m->version);
89-
goto cleanup_fail;
90-
}
9188

9289
hash_version = m->data[MIDX_BYTE_HASH_VERSION];
93-
if (hash_version != MIDX_HASH_VERSION) {
94-
error(_("hash version %u does not match"), hash_version);
95-
goto cleanup_fail;
96-
}
90+
if (hash_version != MIDX_HASH_VERSION)
91+
die(_("hash version %u does not match"), hash_version);
9792
m->hash_len = MIDX_HASH_LEN;
9893

9994
m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS];
@@ -106,6 +101,9 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
106101
uint64_t chunk_offset = get_be64(m->data + MIDX_HEADER_SIZE + 4 +
107102
MIDX_CHUNKLOOKUP_WIDTH * i);
108103

104+
if (chunk_offset >= m->data_len)
105+
die(_("invalid chunk offset (too large)"));
106+
109107
switch (chunk_id) {
110108
case MIDX_CHUNKID_PACKNAMES:
111109
m->chunk_pack_names = m->data + chunk_offset;
@@ -160,12 +158,10 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
160158

161159
cur_pack_name += strlen(cur_pack_name) + 1;
162160

163-
if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) {
164-
error(_("multi-pack-index pack names out of order: '%s' before '%s'"),
161+
if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0)
162+
die(_("multi-pack-index pack names out of order: '%s' before '%s'"),
165163
m->pack_names[i - 1],
166164
m->pack_names[i]);
167-
goto cleanup_fail;
168-
}
169165
}
170166

171167
return m;
@@ -202,7 +198,8 @@ int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id)
202198
struct strbuf pack_name = STRBUF_INIT;
203199

204200
if (pack_int_id >= m->num_packs)
205-
BUG("bad pack-int-id");
201+
die(_("bad pack-int-id: %u (%u total packs"),
202+
pack_int_id, m->num_packs);
206203

207204
if (m->packs[pack_int_id])
208205
return 0;
@@ -241,7 +238,7 @@ static off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
241238
offset32 = get_be32(offset_data + sizeof(uint32_t));
242239

243240
if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) {
244-
if (sizeof(offset32) < sizeof(uint64_t))
241+
if (sizeof(off_t) < sizeof(uint64_t))
245242
die(_("multi-pack-index stores a 64-bit offset, but off_t is too small"));
246243

247244
offset32 ^= MIDX_LARGE_OFFSET_NEEDED;
@@ -928,3 +925,83 @@ void clear_midx_file(const char *object_dir)
928925

929926
free(midx);
930927
}
928+
929+
static int verify_midx_error;
930+
931+
static void midx_report(const char *fmt, ...)
932+
{
933+
va_list ap;
934+
verify_midx_error = 1;
935+
va_start(ap, fmt);
936+
vfprintf(stderr, fmt, ap);
937+
fprintf(stderr, "\n");
938+
va_end(ap);
939+
}
940+
941+
int verify_midx_file(const char *object_dir)
942+
{
943+
uint32_t i;
944+
struct progress *progress = NULL;
945+
struct multi_pack_index *m = load_multi_pack_index(object_dir, 1);
946+
verify_midx_error = 0;
947+
948+
if (!m)
949+
return 0;
950+
951+
for (i = 0; i < m->num_packs; i++) {
952+
if (prepare_midx_pack(m, i))
953+
midx_report("failed to load pack in position %d", i);
954+
}
955+
956+
for (i = 0; i < 255; i++) {
957+
uint32_t oid_fanout1 = ntohl(m->chunk_oid_fanout[i]);
958+
uint32_t oid_fanout2 = ntohl(m->chunk_oid_fanout[i + 1]);
959+
960+
if (oid_fanout1 > oid_fanout2)
961+
midx_report(_("oid fanout out of order: fanout[%d] = %"PRIx32" > %"PRIx32" = fanout[%d]"),
962+
i, oid_fanout1, oid_fanout2, i + 1);
963+
}
964+
965+
for (i = 0; i < m->num_objects - 1; i++) {
966+
struct object_id oid1, oid2;
967+
968+
nth_midxed_object_oid(&oid1, m, i);
969+
nth_midxed_object_oid(&oid2, m, i + 1);
970+
971+
if (oidcmp(&oid1, &oid2) >= 0)
972+
midx_report(_("oid lookup out of order: oid[%d] = %s >= %s = oid[%d]"),
973+
i, oid_to_hex(&oid1), oid_to_hex(&oid2), i + 1);
974+
}
975+
976+
progress = start_progress(_("Verifying object offsets"), m->num_objects);
977+
for (i = 0; i < m->num_objects; i++) {
978+
struct object_id oid;
979+
struct pack_entry e;
980+
off_t m_offset, p_offset;
981+
982+
nth_midxed_object_oid(&oid, m, i);
983+
if (!fill_midx_entry(&oid, &e, m)) {
984+
midx_report(_("failed to load pack entry for oid[%d] = %s"),
985+
i, oid_to_hex(&oid));
986+
continue;
987+
}
988+
989+
if (open_pack_index(e.p)) {
990+
midx_report(_("failed to load pack-index for packfile %s"),
991+
e.p->pack_name);
992+
break;
993+
}
994+
995+
m_offset = e.offset;
996+
p_offset = find_pack_entry_one(oid.hash, e.p);
997+
998+
if (m_offset != p_offset)
999+
midx_report(_("incorrect object offset for oid[%d] = %s: %"PRIx64" != %"PRIx64),
1000+
i, oid_to_hex(&oid), m_offset, p_offset);
1001+
1002+
display_progress(progress, i + 1);
1003+
}
1004+
stop_progress(&progress);
1005+
1006+
return verify_midx_error;
1007+
}

midx.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,5 +43,6 @@ int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, i
4343

4444
int write_midx_file(const char *object_dir);
4545
void clear_midx_file(const char *object_dir);
46+
int verify_midx_file(const char *object_dir);
4647

4748
#endif

t/t5319-multi-pack-index.sh

Lines changed: 135 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,125 @@ test_expect_success 'write midx with twelve packs' '
150150

151151
compare_results_with_midx "twelve packs"
152152

153+
test_expect_success 'verify multi-pack-index success' '
154+
git multi-pack-index verify --object-dir=$objdir
155+
'
156+
157+
# usage: corrupt_midx_and_verify <pos> <data> <objdir> <string>
158+
corrupt_midx_and_verify() {
159+
POS=$1 &&
160+
DATA="${2:-\0}" &&
161+
OBJDIR=$3 &&
162+
GREPSTR="$4" &&
163+
COMMAND="$5" &&
164+
if test -z "$COMMAND"
165+
then
166+
COMMAND="git multi-pack-index verify --object-dir=$OBJDIR"
167+
fi &&
168+
FILE=$OBJDIR/pack/multi-pack-index &&
169+
chmod a+w $FILE &&
170+
test_when_finished mv midx-backup $FILE &&
171+
cp $FILE midx-backup &&
172+
printf "$DATA" | dd of="$FILE" bs=1 seek="$POS" conv=notrunc &&
173+
test_must_fail $COMMAND 2>test_err &&
174+
grep -v "^+" test_err >err &&
175+
test_i18ngrep "$GREPSTR" err
176+
}
177+
178+
test_expect_success 'verify bad signature' '
179+
corrupt_midx_and_verify 0 "\00" $objdir \
180+
"multi-pack-index signature"
181+
'
182+
183+
HASH_LEN=20
184+
NUM_OBJECTS=74
185+
MIDX_BYTE_VERSION=4
186+
MIDX_BYTE_OID_VERSION=5
187+
MIDX_BYTE_CHUNK_COUNT=6
188+
MIDX_HEADER_SIZE=12
189+
MIDX_BYTE_CHUNK_ID=$MIDX_HEADER_SIZE
190+
MIDX_BYTE_CHUNK_OFFSET=$(($MIDX_HEADER_SIZE + 4))
191+
MIDX_NUM_CHUNKS=5
192+
MIDX_CHUNK_LOOKUP_WIDTH=12
193+
MIDX_OFFSET_PACKNAMES=$(($MIDX_HEADER_SIZE + \
194+
$MIDX_NUM_CHUNKS * $MIDX_CHUNK_LOOKUP_WIDTH))
195+
MIDX_BYTE_PACKNAME_ORDER=$(($MIDX_OFFSET_PACKNAMES + 2))
196+
MIDX_OFFSET_OID_FANOUT=$(($MIDX_OFFSET_PACKNAMES + 652))
197+
MIDX_OID_FANOUT_WIDTH=4
198+
MIDX_BYTE_OID_FANOUT_ORDER=$((MIDX_OFFSET_OID_FANOUT + 250 * $MIDX_OID_FANOUT_WIDTH + 1))
199+
MIDX_OFFSET_OID_LOOKUP=$(($MIDX_OFFSET_OID_FANOUT + 256 * $MIDX_OID_FANOUT_WIDTH))
200+
MIDX_BYTE_OID_LOOKUP=$(($MIDX_OFFSET_OID_LOOKUP + 16 * $HASH_LEN))
201+
MIDX_OFFSET_OBJECT_OFFSETS=$(($MIDX_OFFSET_OID_LOOKUP + $NUM_OBJECTS * $HASH_LEN))
202+
MIDX_OFFSET_WIDTH=8
203+
MIDX_BYTE_PACK_INT_ID=$(($MIDX_OFFSET_OBJECT_OFFSETS + 16 * $MIDX_OFFSET_WIDTH + 2))
204+
MIDX_BYTE_OFFSET=$(($MIDX_OFFSET_OBJECT_OFFSETS + 16 * $MIDX_OFFSET_WIDTH + 6))
205+
206+
test_expect_success 'verify bad version' '
207+
corrupt_midx_and_verify $MIDX_BYTE_VERSION "\00" $objdir \
208+
"multi-pack-index version"
209+
'
210+
211+
test_expect_success 'verify bad OID version' '
212+
corrupt_midx_and_verify $MIDX_BYTE_OID_VERSION "\02" $objdir \
213+
"hash version"
214+
'
215+
216+
test_expect_success 'verify truncated chunk count' '
217+
corrupt_midx_and_verify $MIDX_BYTE_CHUNK_COUNT "\01" $objdir \
218+
"missing required"
219+
'
220+
221+
test_expect_success 'verify extended chunk count' '
222+
corrupt_midx_and_verify $MIDX_BYTE_CHUNK_COUNT "\07" $objdir \
223+
"terminating multi-pack-index chunk id appears earlier than expected"
224+
'
225+
226+
test_expect_success 'verify missing required chunk' '
227+
corrupt_midx_and_verify $MIDX_BYTE_CHUNK_ID "\01" $objdir \
228+
"missing required"
229+
'
230+
231+
test_expect_success 'verify invalid chunk offset' '
232+
corrupt_midx_and_verify $MIDX_BYTE_CHUNK_OFFSET "\01" $objdir \
233+
"invalid chunk offset (too large)"
234+
'
235+
236+
test_expect_success 'verify packnames out of order' '
237+
corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "z" $objdir \
238+
"pack names out of order"
239+
'
240+
241+
test_expect_success 'verify packnames out of order' '
242+
corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "a" $objdir \
243+
"failed to load pack"
244+
'
245+
246+
test_expect_success 'verify oid fanout out of order' '
247+
corrupt_midx_and_verify $MIDX_BYTE_OID_FANOUT_ORDER "\01" $objdir \
248+
"oid fanout out of order"
249+
'
250+
251+
test_expect_success 'verify oid lookup out of order' '
252+
corrupt_midx_and_verify $MIDX_BYTE_OID_LOOKUP "\00" $objdir \
253+
"oid lookup out of order"
254+
'
255+
256+
test_expect_success 'verify incorrect pack-int-id' '
257+
corrupt_midx_and_verify $MIDX_BYTE_PACK_INT_ID "\07" $objdir \
258+
"bad pack-int-id"
259+
'
260+
261+
test_expect_success 'verify incorrect offset' '
262+
corrupt_midx_and_verify $MIDX_BYTE_OFFSET "\07" $objdir \
263+
"incorrect object offset"
264+
'
265+
266+
test_expect_success 'git-fsck incorrect offset' '
267+
corrupt_midx_and_verify $MIDX_BYTE_OFFSET "\07" $objdir \
268+
"incorrect object offset" \
269+
"git -c core.multipackindex=true fsck"
270+
'
271+
153272
test_expect_success 'repack removes multi-pack-index' '
154273
test_path_is_file $objdir/pack/multi-pack-index &&
155274
git repack -adf &&
@@ -187,7 +306,6 @@ test_expect_success 'multi-pack-index in an alternate' '
187306

188307
compare_results_with_midx "with alternate (remote midx)"
189308

190-
191309
# usage: corrupt_data <file> <pos> [<data>]
192310
corrupt_data () {
193311
file=$1
@@ -214,4 +332,20 @@ test_expect_success 'force some 64-bit offsets with pack-objects' '
214332
midx_read_expect 1 63 5 objects64 " large-offsets"
215333
'
216334

335+
test_expect_success 'verify multi-pack-index with 64-bit offsets' '
336+
git multi-pack-index verify --object-dir=objects64
337+
'
338+
339+
NUM_OBJECTS=63
340+
MIDX_OFFSET_OID_FANOUT=$((MIDX_OFFSET_PACKNAMES + 54))
341+
MIDX_OFFSET_OID_LOOKUP=$((MIDX_OFFSET_OID_FANOUT + 256 * $MIDX_OID_FANOUT_WIDTH))
342+
MIDX_OFFSET_OBJECT_OFFSETS=$(($MIDX_OFFSET_OID_LOOKUP + $NUM_OBJECTS * $HASH_LEN))
343+
MIDX_OFFSET_LARGE_OFFSETS=$(($MIDX_OFFSET_OBJECT_OFFSETS + $NUM_OBJECTS * $MIDX_OFFSET_WIDTH))
344+
MIDX_BYTE_LARGE_OFFSET=$(($MIDX_OFFSET_LARGE_OFFSETS + 3))
345+
346+
test_expect_success 'verify incorrect 64-bit offset' '
347+
corrupt_midx_and_verify $MIDX_BYTE_LARGE_OFFSET "\07" objects64 \
348+
"incorrect object offset"
349+
'
350+
217351
test_done

0 commit comments

Comments
 (0)