Skip to content

Commit 6e42d9a

Browse files
derrickstoleegitster
authored andcommitted
commit-graph: implement file format version 2
The commit-graph file format had some shortcomings which we now correct: 1. The hash algorithm was determined by a single byte, instead of the 4-byte format identifier. 2. There was no way to update the reachability index we used. We currently only support generation numbers, but that will change in the future. 3. Git did not fail with error if the unused eighth byte was non-zero, so we could not use that to indicate an incremental file format without breaking compatibility across versions. The new format modifies the header of the commit-graph to solve these problems. We use the 4-byte hash format id, freeing up a byte in our 32-bit alignment to introduce a reachability index version. We can also fail to read the commit-graph if the eighth byte is non-zero. Update the 'git commit-graph read' subcommand to display the new data, and check this output in the test that explicitly writes a v2 commit-graph file. While we converted the existing 'verify' tests to use a version 1 file to avoid recalculating data offsets, add explicit 'verify' tests on a version 2 file that corrupt the new header values. Signed-off-by: Derrick Stolee <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent ac00247 commit 6e42d9a

File tree

4 files changed

+134
-9
lines changed

4 files changed

+134
-9
lines changed

Documentation/technical/commit-graph-format.txt

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,22 @@ and hash type.
3131

3232
All 4-byte numbers are in network order.
3333

34+
There are two versions available, 1 and 2. These currently differ only in
35+
the header.
36+
3437
HEADER:
3538

39+
All commit-graph files use the first five bytes for the same purpose.
40+
3641
4-byte signature:
3742
The signature is: {'C', 'G', 'P', 'H'}
3843

3944
1-byte version number:
40-
Currently, the only valid version is 1.
45+
Currently, the valid version numbers are 1 and 2.
46+
47+
The remainder of the header changes depending on the version.
48+
49+
Version 1:
4150

4251
1-byte Hash Version (1 = SHA-1)
4352
We infer the hash length (H) from this value.
@@ -47,6 +56,21 @@ HEADER:
4756
1-byte (reserved for later use)
4857
Current clients should ignore this value.
4958

59+
Version 2:
60+
61+
1-byte number (C) of "chunks"
62+
63+
1-byte reachability index version number:
64+
Currently, the only valid number is 1.
65+
66+
1-byte (reserved for later use)
67+
Current clients expect this value to be zero, and will not
68+
try to read the commit-graph file if it is non-zero.
69+
70+
4-byte format identifier for the hash algorithm:
71+
If this identifier does not agree with the repository's current
72+
hash algorithm, then the client will not read the commit graph.
73+
5074
CHUNK LOOKUP:
5175

5276
(C + 1) * 12 bytes listing the table of contents for the chunks:

builtin/commit-graph.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,11 @@ static int graph_read(int argc, const char **argv)
117117
*(unsigned char*)(graph->data + 5),
118118
*(unsigned char*)(graph->data + 6),
119119
*(unsigned char*)(graph->data + 7));
120+
121+
if (*(unsigned char *)(graph->data + 4) == 2)
122+
printf("hash algorithm: %X\n",
123+
get_be32(graph->data + 8));
124+
120125
printf("num_commits: %u\n", graph->num_commits);
121126
printf("chunks:");
122127

commit-graph.c

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,8 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd,
152152
uint64_t last_chunk_offset;
153153
uint32_t last_chunk_id;
154154
uint32_t graph_signature;
155-
unsigned char graph_version, hash_version;
155+
unsigned char graph_version, hash_version, reach_index_version;
156+
uint32_t hash_id;
156157

157158
if (!graph_map)
158159
return NULL;
@@ -170,7 +171,7 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd,
170171
}
171172

172173
graph_version = *(unsigned char*)(data + 4);
173-
if (graph_version != 1) {
174+
if (!graph_version || graph_version > 2) {
174175
error(_("commit-graph version %X does not match version %X"),
175176
graph_version, 1);
176177
return NULL;
@@ -190,6 +191,30 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd,
190191
graph->num_chunks = *(unsigned char*)(data + 6);
191192
chunk_lookup = data + 8;
192193
break;
194+
195+
case 2:
196+
graph->num_chunks = *(unsigned char *)(data + 5);
197+
198+
reach_index_version = *(unsigned char *)(data + 6);
199+
if (reach_index_version != 1) {
200+
error(_("unsupported reachability index version %d"),
201+
reach_index_version);
202+
return NULL;
203+
}
204+
205+
if (*(unsigned char*)(data + 7)) {
206+
error(_("unsupported value in commit-graph header"));
207+
return NULL;
208+
}
209+
210+
hash_id = get_be32(data + 8);
211+
if (hash_id != the_hash_algo->format_id) {
212+
error(_("commit-graph hash algorithm does not match current algorithm"));
213+
return NULL;
214+
}
215+
216+
chunk_lookup = data + 12;
217+
break;
193218
}
194219

195220
graph->hash_len = the_hash_algo->rawsz;
@@ -899,7 +924,7 @@ int write_commit_graph(const char *obj_dir,
899924

900925
if (!version)
901926
version = 1;
902-
if (version != 1) {
927+
if (version <= 0 || version > 2) {
903928
error(_("unsupported commit-graph version %d"),
904929
version);
905930
return 1;
@@ -1098,6 +1123,14 @@ int write_commit_graph(const char *obj_dir,
10981123
hashwrite_u8(f, 0); /* unused padding byte */
10991124
header_size = 8;
11001125
break;
1126+
1127+
case 2:
1128+
hashwrite_u8(f, num_chunks);
1129+
hashwrite_u8(f, 1); /* reachability index version */
1130+
hashwrite_u8(f, 0); /* unused padding byte */
1131+
hashwrite_be32(f, the_hash_algo->format_id);
1132+
header_size = 12;
1133+
break;
11011134
}
11021135

11031136
chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT;

t/t5318-commit-graph.sh

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,13 @@ test_expect_success 'create commits and repack' '
3333
git repack
3434
'
3535

36-
graph_git_two_modes() {
36+
graph_git_two_modes () {
3737
git -c core.commitGraph=true $1 >output
3838
git -c core.commitGraph=false $1 >expect
3939
test_cmp expect output
4040
}
4141

42-
graph_git_behavior() {
42+
graph_git_behavior () {
4343
MSG=$1
4444
DIR=$2
4545
BRANCH=$3
@@ -56,7 +56,7 @@ graph_git_behavior() {
5656

5757
graph_git_behavior 'no graph' full commits/3 commits/1
5858

59-
graph_read_expect() {
59+
graph_read_expect () {
6060
OPTIONAL=""
6161
NUM_CHUNKS=3
6262
if test ! -z $2
@@ -320,6 +320,34 @@ test_expect_success 'replace-objects invalidates commit-graph' '
320320
)
321321
'
322322

323+
graph_read_expect_v2 () {
324+
OPTIONAL=""
325+
NUM_CHUNKS=3
326+
if test ! -z $2
327+
then
328+
OPTIONAL=" $2"
329+
NUM_CHUNKS=$((3 + $(echo "$2" | wc -w)))
330+
fi
331+
cat >expect <<- EOF
332+
header: 43475048 2 $NUM_CHUNKS 1 0
333+
hash algorithm: 73686131
334+
num_commits: $1
335+
chunks: oid_fanout oid_lookup commit_metadata$OPTIONAL
336+
EOF
337+
git commit-graph read >output &&
338+
test_cmp expect output
339+
}
340+
341+
test_expect_success 'write v2 graph' '
342+
cd "$TRASH_DIRECTORY/full" &&
343+
git commit-graph write --reachable --version=2 &&
344+
graph_read_expect_v2 11 extra_edges &&
345+
git commit-graph verify
346+
'
347+
348+
graph_git_behavior 'version 2 graph, commit 8 vs merge 2' full commits/8 merge/2
349+
graph_git_behavior 'version 2 graph, commit 8 vs merge 2' full commits/8 merge/2
350+
323351
# the verify tests below expect the commit-graph to contain
324352
# exactly the commits reachable from the commits/8 branch.
325353
# If the file changes the set of commits in the list, then the
@@ -392,7 +420,7 @@ corrupt_graph_verify() {
392420
# starting at <zero_pos>, then runs 'git commit-graph verify'
393421
# and places the output in the file 'err'. Test 'err' for
394422
# the given string.
395-
corrupt_graph_and_verify() {
423+
corrupt_graph_and_verify () {
396424
pos=$1
397425
data="${2:-\0}"
398426
grepstr=$3
@@ -424,10 +452,14 @@ test_expect_success 'detect bad signature' '
424452
'
425453

426454
test_expect_success 'detect bad version' '
427-
corrupt_graph_and_verify $GRAPH_BYTE_VERSION "\02" \
455+
corrupt_graph_and_verify $GRAPH_BYTE_VERSION "\03" \
428456
"graph version"
429457
'
430458

459+
test_expect_success 'detect version 2 with version 1 data' '
460+
corrupt_graph_and_verify $GRAPH_BYTE_VERSION "\02" \
461+
"reachability index version"
462+
'
431463
test_expect_success 'detect bad hash version' '
432464
corrupt_graph_and_verify $GRAPH_BYTE_HASH "\02" \
433465
"hash version"
@@ -532,6 +564,37 @@ test_expect_success 'git fsck (checks commit-graph)' '
532564
test_must_fail git fsck
533565
'
534566

567+
test_expect_success 'rewrite commmit-graph with version 2' '
568+
rm -f .git/objects/info/commit-graph &&
569+
git commit-graph write --reachable --version=2 &&
570+
git commit-graph verify
571+
'
572+
573+
GRAPH_BYTE_CHUNK_COUNT=5
574+
GRAPH_BYTE_REACH_INDEX=6
575+
GRAPH_BYTE_UNUSED=7
576+
GRAPH_BYTE_HASH=8
577+
578+
test_expect_success 'detect low chunk count (v2)' '
579+
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\02" \
580+
"missing the .* chunk"
581+
'
582+
583+
test_expect_success 'detect incorrect reachability index' '
584+
corrupt_graph_and_verify $GRAPH_BYTE_REACH_INDEX "\03" \
585+
"reachability index version"
586+
'
587+
588+
test_expect_success 'detect non-zero unused byte' '
589+
corrupt_graph_and_verify $GRAPH_BYTE_UNUSED "\01" \
590+
"unsupported value"
591+
'
592+
593+
test_expect_success 'detect bad hash version (v2)' '
594+
corrupt_graph_and_verify $GRAPH_BYTE_HASH "\00" \
595+
"hash algorithm"
596+
'
597+
535598
test_expect_success 'setup non-the_repository tests' '
536599
rm -rf repo &&
537600
git init repo &&

0 commit comments

Comments
 (0)