Skip to content

Commit e337a04

Browse files
committed
index-pack: --verify
Given an existing .pack file and the .idx file that describes it, this new mode of operation reads and re-index the packfile and makes sure the existing .idx file matches the result byte-for-byte. All the objects in the .pack file are validated during this operation as well. Unlike verify-pack, which visits each object described in the .idx file in the SHA-1 order, index-pack efficiently exploits the delta-chain to avoid rebuilding the objects that are used as the base of deltified objects over and over again while validating the objects, resulting in much quicker verification of the .pack file and its .idx file. This version however cannot verify a .pack/.idx pair with a handcrafted v2 index that uses 64-bit offset representation for offsets that would fit within 31-bit. You can create such an .idx file by giving a custom offset to --index-version option to the command. Signed-off-by: Junio C Hamano <[email protected]>
1 parent ebcfb37 commit e337a04

File tree

6 files changed

+125
-17
lines changed

6 files changed

+125
-17
lines changed

builtin/index-pack.c

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
#include "exec_cmd.h"
1212

1313
static const char index_pack_usage[] =
14-
"git index-pack [-v] [-o <index-file>] [ --keep | --keep=<msg> ] [--strict] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";
14+
"git index-pack [-v] [-o <index-file>] [--keep | --keep=<msg>] [--verify] [--strict] (<pack-file> | --stdin [--fix-thin] [<pack-file>])";
1515

1616
struct object_entry
1717
{
@@ -891,9 +891,32 @@ static int git_index_pack_config(const char *k, const char *v, void *cb)
891891
return git_default_config(k, v, cb);
892892
}
893893

894+
static void read_idx_option(struct pack_idx_option *opts, const char *pack_name)
895+
{
896+
struct packed_git *p = add_packed_git(pack_name, strlen(pack_name), 1);
897+
898+
if (!p)
899+
die("Cannot open existing pack file '%s'", pack_name);
900+
if (open_pack_index(p))
901+
die("Cannot open existing pack idx file for '%s'", pack_name);
902+
903+
/* Read the attributes from the existing idx file */
904+
opts->version = p->index_version;
905+
906+
/*
907+
* Get rid of the idx file as we do not need it anymore.
908+
* NEEDSWORK: extract this bit from free_pack_by_name() in
909+
* sha1_file.c, perhaps? It shouldn't matter very much as we
910+
* know we haven't installed this pack (hence we never have
911+
* read anything from it).
912+
*/
913+
close_pack_index(p);
914+
free(p);
915+
}
916+
894917
int cmd_index_pack(int argc, const char **argv, const char *prefix)
895918
{
896-
int i, fix_thin_pack = 0;
919+
int i, fix_thin_pack = 0, verify = 0;
897920
const char *curr_pack, *curr_index;
898921
const char *index_name = NULL, *pack_name = NULL;
899922
const char *keep_name = NULL, *keep_msg = NULL;
@@ -922,6 +945,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
922945
fix_thin_pack = 1;
923946
} else if (!strcmp(arg, "--strict")) {
924947
strict = 1;
948+
} else if (!strcmp(arg, "--verify")) {
949+
verify = 1;
925950
} else if (!strcmp(arg, "--keep")) {
926951
keep_msg = "";
927952
} else if (!prefixcmp(arg, "--keep=")) {
@@ -988,6 +1013,12 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
9881013
strcpy(keep_name_buf + len - 5, ".keep");
9891014
keep_name = keep_name_buf;
9901015
}
1016+
if (verify) {
1017+
if (!index_name)
1018+
die("--verify with no packfile name given");
1019+
read_idx_option(&opts, index_name);
1020+
opts.flags |= WRITE_IDX_VERIFY;
1021+
}
9911022

9921023
curr_pack = open_pack_file(pack_name);
9931024
parse_pack_header();
@@ -1038,10 +1069,13 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
10381069
curr_index = write_idx_file(index_name, idx_objects, nr_objects, &opts, pack_sha1);
10391070
free(idx_objects);
10401071

1041-
final(pack_name, curr_pack,
1042-
index_name, curr_index,
1043-
keep_name, keep_msg,
1044-
pack_sha1);
1072+
if (!verify)
1073+
final(pack_name, curr_pack,
1074+
index_name, curr_index,
1075+
keep_name, keep_msg,
1076+
pack_sha1);
1077+
else
1078+
close(input_fd);
10451079
free(objects);
10461080
free(index_name_buf);
10471081
free(keep_name_buf);

csum-file.c

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,20 @@
1111
#include "progress.h"
1212
#include "csum-file.h"
1313

14-
static void flush(struct sha1file *f, void * buf, unsigned int count)
14+
static void flush(struct sha1file *f, void *buf, unsigned int count)
1515
{
16+
if (0 <= f->check_fd && count) {
17+
unsigned char check_buffer[8192];
18+
ssize_t ret = read_in_full(f->check_fd, check_buffer, count);
19+
20+
if (ret < 0)
21+
die_errno("%s: sha1 file read error", f->name);
22+
if (ret < count)
23+
die("%s: sha1 file truncated", f->name);
24+
if (memcmp(buf, check_buffer, count))
25+
die("sha1 file '%s' validation error", f->name);
26+
}
27+
1628
for (;;) {
1729
int ret = xwrite(f->fd, buf, count);
1830
if (ret > 0) {
@@ -59,6 +71,17 @@ int sha1close(struct sha1file *f, unsigned char *result, unsigned int flags)
5971
fd = 0;
6072
} else
6173
fd = f->fd;
74+
if (0 <= f->check_fd) {
75+
char discard;
76+
int cnt = read_in_full(f->check_fd, &discard, 1);
77+
if (cnt < 0)
78+
die_errno("%s: error when reading the tail of sha1 file",
79+
f->name);
80+
if (cnt)
81+
die("%s: sha1 file has trailing garbage", f->name);
82+
if (close(f->check_fd))
83+
die_errno("%s: sha1 file error on close", f->name);
84+
}
6285
free(f);
6386
return fd;
6487
}
@@ -101,10 +124,31 @@ struct sha1file *sha1fd(int fd, const char *name)
101124
return sha1fd_throughput(fd, name, NULL);
102125
}
103126

127+
struct sha1file *sha1fd_check(const char *name)
128+
{
129+
int sink, check;
130+
struct sha1file *f;
131+
132+
sink = open("/dev/null", O_WRONLY);
133+
if (sink < 0)
134+
return NULL;
135+
check = open(name, O_RDONLY);
136+
if (check < 0) {
137+
int saved_errno = errno;
138+
close(sink);
139+
errno = saved_errno;
140+
return NULL;
141+
}
142+
f = sha1fd(sink, name);
143+
f->check_fd = check;
144+
return f;
145+
}
146+
104147
struct sha1file *sha1fd_throughput(int fd, const char *name, struct progress *tp)
105148
{
106149
struct sha1file *f = xmalloc(sizeof(*f));
107150
f->fd = fd;
151+
f->check_fd = -1;
108152
f->offset = 0;
109153
f->total = 0;
110154
f->tp = tp;

csum-file.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ struct progress;
66
/* A SHA1-protected file */
77
struct sha1file {
88
int fd;
9+
int check_fd;
910
unsigned int offset;
1011
git_SHA_CTX ctx;
1112
off_t total;
@@ -21,6 +22,7 @@ struct sha1file {
2122
#define CSUM_FSYNC 2
2223

2324
extern struct sha1file *sha1fd(int fd, const char *name);
25+
extern struct sha1file *sha1fd_check(const char *name);
2426
extern struct sha1file *sha1fd_throughput(int fd, const char *name, struct progress *tp);
2527
extern int sha1close(struct sha1file *, unsigned char *, unsigned int);
2628
extern int sha1write(struct sha1file *, void *, unsigned int);

pack-write.c

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,17 +47,22 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec
4747
else
4848
sorted_by_sha = list = last = NULL;
4949

50-
if (!index_name) {
51-
static char tmpfile[PATH_MAX];
52-
fd = odb_mkstemp(tmpfile, sizeof(tmpfile), "pack/tmp_idx_XXXXXX");
53-
index_name = xstrdup(tmpfile);
50+
if (opts->flags & WRITE_IDX_VERIFY) {
51+
assert(index_name);
52+
f = sha1fd_check(index_name);
5453
} else {
55-
unlink(index_name);
56-
fd = open(index_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
54+
if (!index_name) {
55+
static char tmpfile[PATH_MAX];
56+
fd = odb_mkstemp(tmpfile, sizeof(tmpfile), "pack/tmp_idx_XXXXXX");
57+
index_name = xstrdup(tmpfile);
58+
} else {
59+
unlink(index_name);
60+
fd = open(index_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
61+
}
62+
if (fd < 0)
63+
die_errno("unable to create '%s'", index_name);
64+
f = sha1fd(fd, index_name);
5765
}
58-
if (fd < 0)
59-
die_errno("unable to create '%s'", index_name);
60-
f = sha1fd(fd, index_name);
6166

6267
/* if last object's offset is >= 2^31 we should use index V2 */
6368
index_version = (last_obj_offset >> 31) ? 2 : opts->version;
@@ -142,7 +147,8 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec
142147
}
143148

144149
sha1write(f, sha1, 20);
145-
sha1close(f, NULL, CSUM_FSYNC);
150+
sha1close(f, NULL, ((opts->flags & WRITE_IDX_VERIFY)
151+
? CSUM_CLOSE : CSUM_FSYNC));
146152
git_SHA1_Final(sha1, &ctx);
147153
return index_name;
148154
}

pack.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ struct pack_header {
3535
#define PACK_IDX_SIGNATURE 0xff744f63 /* "\377tOc" */
3636

3737
struct pack_idx_option {
38+
unsigned flags;
39+
/* flag bits */
40+
#define WRITE_IDX_VERIFY 01
41+
3842
uint32_t version;
3943
uint32_t off32_limit;
4044
};

t/t5302-pack-index.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,14 @@ test_expect_success \
6565
'cmp "test-1-${pack1}.idx" "1.idx" &&
6666
cmp "test-2-${pack2}.idx" "2.idx"'
6767

68+
test_expect_success 'index-pack --verify on index version 1' '
69+
git index-pack --verify "test-1-${pack1}.pack"
70+
'
71+
72+
test_expect_success 'index-pack --verify on index version 2' '
73+
git index-pack --verify "test-2-${pack2}.pack"
74+
'
75+
6876
test_expect_success \
6977
'index v2: force some 64-bit offsets with pack-objects' \
7078
'pack3=$(git pack-objects --index-version=2,0x40000 test-3 <obj-list)'
@@ -93,6 +101,16 @@ test_expect_success OFF64_T \
93101
'64-bit offsets: index-pack result should match pack-objects one' \
94102
'cmp "test-3-${pack3}.idx" "3.idx"'
95103

104+
test_expect_success OFF64_T 'index-pack --verify on 64-bit offset v2 (cheat)' '
105+
# This cheats by knowing which lower offset should still be encoded
106+
# in 64-bit representation.
107+
git index-pack --verify --index-version=2,0x40000 "test-3-${pack3}.pack"
108+
'
109+
110+
test_expect_failure OFF64_T 'index-pack --verify on 64-bit offset v2' '
111+
git index-pack --verify "test-3-${pack3}.pack"
112+
'
113+
96114
# returns the object number for given object in given pack index
97115
index_obj_nr()
98116
{

0 commit comments

Comments
 (0)