Skip to content

Commit a54cc52

Browse files
committed
Merge branch 'ds/commit-graph-gen-v2-fixes'
Fixes to the way generation number v2 in the commit-graph files are (not) handled. * ds/commit-graph-gen-v2-fixes: commit-graph: declare bankruptcy on GDAT chunks commit-graph: fix generation number v2 overflow values commit-graph: start parsing generation v2 (again) commit-graph: fix ordering bug in generation numbers t5318: extract helpers to lib-commit-graph.sh test-read-graph: include extra post-parse info
2 parents a2fc9c3 + 6dbf4b8 commit a54cc52

File tree

8 files changed

+176
-58
lines changed

8 files changed

+176
-58
lines changed

Documentation/technical/commit-graph-format.txt

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ CHUNK DATA:
9393
2 bits of the lowest byte, storing the 33rd and 34th bit of the
9494
commit time.
9595

96-
Generation Data (ID: {'G', 'D', 'A', 'T' }) (N * 4 bytes) [Optional]
96+
Generation Data (ID: {'G', 'D', 'A', '2' }) (N * 4 bytes) [Optional]
9797
* This list of 4-byte values store corrected commit date offsets for the
9898
commits, arranged in the same order as commit data chunk.
9999
* If the corrected commit date offset cannot be stored within 31 bits,
@@ -104,7 +104,7 @@ CHUNK DATA:
104104
by compatible versions of Git and in case of split commit-graph chains,
105105
the topmost layer also has Generation Data chunk.
106106

107-
Generation Data Overflow (ID: {'G', 'D', 'O', 'V' }) [Optional]
107+
Generation Data Overflow (ID: {'G', 'D', 'O', '2' }) [Optional]
108108
* This list of 8-byte values stores the corrected commit date offsets
109109
for commits with corrected commit date offsets that cannot be
110110
stored within 31 bits.
@@ -156,3 +156,11 @@ CHUNK DATA:
156156
TRAILER:
157157

158158
H-byte HASH-checksum of all of the above.
159+
160+
== Historical Notes:
161+
162+
The Generation Data (GDA2) and Generation Data Overflow (GDO2) chunks have
163+
the number '2' in their chunk IDs because a previous version of Git wrote
164+
possibly erroneous data in these chunks with the IDs "GDAT" and "GDOV". By
165+
changing the IDs, newer versions of Git will silently ignore those older
166+
chunks and write the new information without trusting the incorrect data.

commit-graph.c

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ void git_test_write_commit_graph_or_die(void)
3939
#define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
4040
#define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
4141
#define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
42-
#define GRAPH_CHUNKID_GENERATION_DATA 0x47444154 /* "GDAT" */
43-
#define GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW 0x47444f56 /* "GDOV" */
42+
#define GRAPH_CHUNKID_GENERATION_DATA 0x47444132 /* "GDA2" */
43+
#define GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW 0x47444f32 /* "GDO2" */
4444
#define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */
4545
#define GRAPH_CHUNKID_BLOOMINDEXES 0x42494458 /* "BIDX" */
4646
#define GRAPH_CHUNKID_BLOOMDATA 0x42444154 /* "BDAT" */
@@ -407,6 +407,9 @@ struct commit_graph *parse_commit_graph(struct repository *r,
407407
&graph->chunk_generation_data);
408408
pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW,
409409
&graph->chunk_generation_data_overflow);
410+
411+
if (graph->chunk_generation_data)
412+
graph->read_generation_data = 1;
410413
}
411414

412415
if (r->settings.commit_graph_read_changed_paths) {
@@ -803,7 +806,7 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g,
803806
die(_("commit-graph requires overflow generation data but has none"));
804807

805808
offset_pos = offset ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW;
806-
graph_data->generation = get_be64(g->chunk_generation_data_overflow + 8 * offset_pos);
809+
graph_data->generation = item->date + get_be64(g->chunk_generation_data_overflow + 8 * offset_pos);
807810
} else
808811
graph_data->generation = item->date + offset;
809812
} else
@@ -1556,12 +1559,16 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
15561559
if (current->date && current->date > max_corrected_commit_date)
15571560
max_corrected_commit_date = current->date - 1;
15581561
commit_graph_data_at(current)->generation = max_corrected_commit_date + 1;
1559-
1560-
if (commit_graph_data_at(current)->generation - current->date > GENERATION_NUMBER_V2_OFFSET_MAX)
1561-
ctx->num_generation_data_overflows++;
15621562
}
15631563
}
15641564
}
1565+
1566+
for (i = 0; i < ctx->commits.nr; i++) {
1567+
struct commit *c = ctx->commits.list[i];
1568+
timestamp_t offset = commit_graph_data_at(c)->generation - c->date;
1569+
if (offset > GENERATION_NUMBER_V2_OFFSET_MAX)
1570+
ctx->num_generation_data_overflows++;
1571+
}
15651572
stop_progress(&ctx->progress);
15661573
}
15671574

t/helper/test-read-graph.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "commit-graph.h"
44
#include "repository.h"
55
#include "object-store.h"
6+
#include "bloom.h"
67

78
int cmd__read_graph(int argc, const char **argv)
89
{
@@ -45,6 +46,18 @@ int cmd__read_graph(int argc, const char **argv)
4546
printf(" bloom_data");
4647
printf("\n");
4748

49+
printf("options:");
50+
if (graph->bloom_filter_settings)
51+
printf(" bloom(%"PRIu32",%"PRIu32",%"PRIu32")",
52+
graph->bloom_filter_settings->hash_version,
53+
graph->bloom_filter_settings->bits_per_entry,
54+
graph->bloom_filter_settings->num_hashes);
55+
if (graph->read_generation_data)
56+
printf(" read_generation_data");
57+
if (graph->topo_levels)
58+
printf(" topo_levels");
59+
printf("\n");
60+
4861
UNLEAK(graph);
4962

5063
return 0;

t/lib-commit-graph.sh

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#!/bin/sh
2+
3+
# Helper functions for testing commit-graphs.
4+
5+
# Initialize OID cache with oid_version
6+
test_oid_cache <<-EOF
7+
oid_version sha1:1
8+
oid_version sha256:2
9+
EOF
10+
11+
graph_git_two_modes() {
12+
git -c core.commitGraph=true $1 >output &&
13+
git -c core.commitGraph=false $1 >expect &&
14+
test_cmp expect output
15+
}
16+
17+
graph_git_behavior() {
18+
MSG=$1
19+
DIR=$2
20+
BRANCH=$3
21+
COMPARE=$4
22+
test_expect_success "check normal git operations: $MSG" '
23+
cd "$TRASH_DIRECTORY/$DIR" &&
24+
graph_git_two_modes "log --oneline $BRANCH" &&
25+
graph_git_two_modes "log --topo-order $BRANCH" &&
26+
graph_git_two_modes "log --graph $COMPARE..$BRANCH" &&
27+
graph_git_two_modes "branch -vv" &&
28+
graph_git_two_modes "merge-base -a $BRANCH $COMPARE"
29+
'
30+
}
31+
32+
graph_read_expect() {
33+
OPTIONAL=""
34+
NUM_CHUNKS=3
35+
if test -n "$2"
36+
then
37+
OPTIONAL=" $2"
38+
NUM_CHUNKS=$((3 + $(echo "$2" | wc -w)))
39+
fi
40+
GENERATION_VERSION=2
41+
if test -n "$3"
42+
then
43+
GENERATION_VERSION=$3
44+
fi
45+
OPTIONS=
46+
if test $GENERATION_VERSION -gt 1
47+
then
48+
OPTIONS=" read_generation_data"
49+
fi
50+
cat >expect <<- EOF
51+
header: 43475048 1 $(test_oid oid_version) $NUM_CHUNKS 0
52+
num_commits: $1
53+
chunks: oid_fanout oid_lookup commit_metadata$OPTIONAL
54+
options:$OPTIONS
55+
EOF
56+
test-tool read-graph >output &&
57+
test_cmp expect output
58+
}

t/t4216-log-bloom.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ graph_read_expect () {
4848
header: 43475048 1 $(test_oid oid_version) $NUM_CHUNKS 0
4949
num_commits: $1
5050
chunks: oid_fanout oid_lookup commit_metadata generation_data bloom_indexes bloom_data
51+
options: bloom(1,10,7) read_generation_data
5152
EOF
5253
test-tool read-graph >actual &&
5354
test_cmp expect actual

t/t5318-commit-graph.sh

Lines changed: 5 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,7 @@ test_expect_success 'setup full repo' '
2929
cd "$TRASH_DIRECTORY/full" &&
3030
git init &&
3131
git config core.commitGraph true &&
32-
objdir=".git/objects" &&
33-
34-
test_oid_cache <<-EOF
35-
oid_version sha1:1
36-
oid_version sha256:2
37-
EOF
32+
objdir=".git/objects"
3833
'
3934

4035
test_expect_success POSIXPERM 'tweak umask for modebit tests' '
@@ -69,46 +64,10 @@ test_expect_success 'create commits and repack' '
6964
git repack
7065
'
7166

72-
graph_git_two_modes() {
73-
git -c core.commitGraph=true $1 >output &&
74-
git -c core.commitGraph=false $1 >expect &&
75-
test_cmp expect output
76-
}
77-
78-
graph_git_behavior() {
79-
MSG=$1
80-
DIR=$2
81-
BRANCH=$3
82-
COMPARE=$4
83-
test_expect_success "check normal git operations: $MSG" '
84-
cd "$TRASH_DIRECTORY/$DIR" &&
85-
graph_git_two_modes "log --oneline $BRANCH" &&
86-
graph_git_two_modes "log --topo-order $BRANCH" &&
87-
graph_git_two_modes "log --graph $COMPARE..$BRANCH" &&
88-
graph_git_two_modes "branch -vv" &&
89-
graph_git_two_modes "merge-base -a $BRANCH $COMPARE"
90-
'
91-
}
67+
. "$TEST_DIRECTORY"/lib-commit-graph.sh
9268

9369
graph_git_behavior 'no graph' full commits/3 commits/1
9470

95-
graph_read_expect() {
96-
OPTIONAL=""
97-
NUM_CHUNKS=3
98-
if test ! -z "$2"
99-
then
100-
OPTIONAL=" $2"
101-
NUM_CHUNKS=$((3 + $(echo "$2" | wc -w)))
102-
fi
103-
cat >expect <<- EOF
104-
header: 43475048 1 $(test_oid oid_version) $NUM_CHUNKS 0
105-
num_commits: $1
106-
chunks: oid_fanout oid_lookup commit_metadata$OPTIONAL
107-
EOF
108-
test-tool read-graph >output &&
109-
test_cmp expect output
110-
}
111-
11271
test_expect_success 'exit with correct error on bad input to --stdin-commits' '
11372
cd "$TRASH_DIRECTORY/full" &&
11473
# invalid, non-hex OID
@@ -466,10 +425,10 @@ test_expect_success 'warn on improper hash version' '
466425
)
467426
'
468427

469-
test_expect_success 'lower layers have overflow chunk' '
428+
test_expect_success TIME_IS_64BIT,TIME_T_IS_64BIT 'lower layers have overflow chunk' '
470429
cd "$TRASH_DIRECTORY/full" &&
471430
UNIX_EPOCH_ZERO="@0 +0000" &&
472-
FUTURE_DATE="@2147483646 +0000" &&
431+
FUTURE_DATE="@4147483646 +0000" &&
473432
rm -f .git/objects/info/commit-graph &&
474433
test_commit --date "$FUTURE_DATE" future-1 &&
475434
test_commit --date "$UNIX_EPOCH_ZERO" old-1 &&
@@ -497,7 +456,7 @@ test_expect_success 'git commit-graph verify' '
497456
cd "$TRASH_DIRECTORY/full" &&
498457
git rev-parse commits/8 | git -c commitGraph.generationVersion=1 commit-graph write --stdin-commits &&
499458
git commit-graph verify >output &&
500-
graph_read_expect 9 extra_edges
459+
graph_read_expect 9 extra_edges 1
501460
'
502461

503462
NUM_COMMITS=9
@@ -825,10 +784,6 @@ test_expect_success 'set up and verify repo with generation data overflow chunk'
825784
objdir=".git/objects" &&
826785
UNIX_EPOCH_ZERO="@0 +0000" &&
827786
FUTURE_DATE="@2147483646 +0000" &&
828-
test_oid_cache <<-EOF &&
829-
oid_version sha1:1
830-
oid_version sha256:2
831-
EOF
832787
cd "$TRASH_DIRECTORY" &&
833788
mkdir repo &&
834789
cd repo &&

t/t5324-split-commit-graph.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,16 @@ graph_read_expect() {
3030
then
3131
NUM_BASE=$2
3232
fi
33+
OPTIONS=
34+
if test -z "$3"
35+
then
36+
OPTIONS=" read_generation_data"
37+
fi
3338
cat >expect <<- EOF
3439
header: 43475048 1 $(test_oid oid_version) 4 $NUM_BASE
3540
num_commits: $1
3641
chunks: oid_fanout oid_lookup commit_metadata generation_data
42+
options:$OPTIONS
3743
EOF
3844
test-tool read-graph >output &&
3945
test_cmp expect output
@@ -508,6 +514,7 @@ test_expect_success 'setup repo for mixed generation commit-graph-chain' '
508514
header: 43475048 1 $(test_oid oid_version) 4 1
509515
num_commits: $NUM_SECOND_LAYER_COMMITS
510516
chunks: oid_fanout oid_lookup commit_metadata
517+
options:
511518
EOF
512519
test_cmp expect output &&
513520
git commit-graph verify &&
@@ -540,6 +547,7 @@ test_expect_success 'do not write generation data chunk if not present on existi
540547
header: 43475048 1 $(test_oid oid_version) 4 2
541548
num_commits: $NUM_THIRD_LAYER_COMMITS
542549
chunks: oid_fanout oid_lookup commit_metadata
550+
options:
543551
EOF
544552
test_cmp expect output &&
545553
git commit-graph verify
@@ -581,6 +589,7 @@ test_expect_success 'do not write generation data chunk if the topmost remaining
581589
header: 43475048 1 $(test_oid oid_version) 4 2
582590
num_commits: $(($NUM_THIRD_LAYER_COMMITS + $NUM_FOURTH_LAYER_COMMITS))
583591
chunks: oid_fanout oid_lookup commit_metadata
592+
options:
584593
EOF
585594
test_cmp expect output &&
586595
git commit-graph verify
@@ -620,6 +629,7 @@ test_expect_success 'write generation data chunk if topmost remaining layer has
620629
header: 43475048 1 $(test_oid oid_version) 5 1
621630
num_commits: $(($NUM_SECOND_LAYER_COMMITS + $NUM_THIRD_LAYER_COMMITS + $NUM_FOURTH_LAYER_COMMITS + $NUM_FIFTH_LAYER_COMMITS))
622631
chunks: oid_fanout oid_lookup commit_metadata generation_data
632+
options: read_generation_data
623633
EOF
624634
test_cmp expect output
625635
)

t/t5328-commit-graph-64bit-time.sh

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/bin/sh
2+
3+
test_description='commit graph with 64-bit timestamps'
4+
. ./test-lib.sh
5+
6+
if ! test_have_prereq TIME_IS_64BIT || ! test_have_prereq TIME_T_IS_64BIT
7+
then
8+
skip_all='skipping 64-bit timestamp tests'
9+
test_done
10+
fi
11+
12+
. "$TEST_DIRECTORY"/lib-commit-graph.sh
13+
14+
UNIX_EPOCH_ZERO="@0 +0000"
15+
FUTURE_DATE="@4147483646 +0000"
16+
17+
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0
18+
19+
test_expect_success 'lower layers have overflow chunk' '
20+
rm -f .git/objects/info/commit-graph &&
21+
test_commit --date "$FUTURE_DATE" future-1 &&
22+
test_commit --date "$UNIX_EPOCH_ZERO" old-1 &&
23+
git commit-graph write --reachable &&
24+
test_commit --date "$FUTURE_DATE" future-2 &&
25+
test_commit --date "$UNIX_EPOCH_ZERO" old-2 &&
26+
git commit-graph write --reachable --split=no-merge &&
27+
test_commit extra &&
28+
git commit-graph write --reachable --split=no-merge &&
29+
git commit-graph write --reachable &&
30+
graph_read_expect 5 "generation_data generation_data_overflow" &&
31+
mv .git/objects/info/commit-graph commit-graph-upgraded &&
32+
git commit-graph write --reachable &&
33+
graph_read_expect 5 "generation_data generation_data_overflow" &&
34+
test_cmp .git/objects/info/commit-graph commit-graph-upgraded
35+
'
36+
37+
graph_git_behavior 'overflow' '' HEAD~2 HEAD
38+
39+
test_expect_success 'set up and verify repo with generation data overflow chunk' '
40+
mkdir repo &&
41+
cd repo &&
42+
git init &&
43+
test_commit --date "$UNIX_EPOCH_ZERO" 1 &&
44+
test_commit 2 &&
45+
test_commit --date "$UNIX_EPOCH_ZERO" 3 &&
46+
git commit-graph write --reachable &&
47+
graph_read_expect 3 generation_data &&
48+
test_commit --date "$FUTURE_DATE" 4 &&
49+
test_commit 5 &&
50+
test_commit --date "$UNIX_EPOCH_ZERO" 6 &&
51+
git branch left &&
52+
git reset --hard 3 &&
53+
test_commit 7 &&
54+
test_commit --date "$FUTURE_DATE" 8 &&
55+
test_commit 9 &&
56+
git branch right &&
57+
git reset --hard 3 &&
58+
test_merge M left right &&
59+
git commit-graph write --reachable &&
60+
graph_read_expect 10 "generation_data generation_data_overflow" &&
61+
git commit-graph verify
62+
'
63+
64+
graph_git_behavior 'overflow 2' repo left right
65+
66+
test_done

0 commit comments

Comments
 (0)