Skip to content

Commit 1fdc383

Browse files
abhishekkumar2718gitster
authored andcommitted
commit-graph: use generation v2 only if entire chain does
Since there are released versions of Git that understand generation numbers in the commit-graph's CDAT chunk but do not understand the GDAT chunk, the following scenario is possible: 1. "New" Git writes a commit-graph with the GDAT chunk. 2. "Old" Git writes a split commit-graph on top without a GDAT chunk. If each layer of split commit-graph is treated independently, as it was the case before this commit, with Git inspecting only the current layer for chunk_generation_data pointer, commits in the lower layer (one with GDAT) whould have corrected commit date as their generation number, while commits in the upper layer would have topological levels as their generation. Corrected commit dates usually have much larger values than topological levels. This means that if we take two commits, one from the upper layer, and one reachable from it in the lower layer, then the expectation that the generation of a parent is smaller than the generation of a child would be violated. It is difficult to expose this issue in a test. Since we _start_ with artificially low generation numbers, any commit walk that prioritizes generation numbers will walk all of the commits with high generation number before walking the commits with low generation number. In all the cases I tried, the commit-graph layers themselves "protect" any incorrect behavior since none of the commits in the lower layer can reach the commits in the upper layer. This issue would manifest itself as a performance problem in this case, especially with something like "git log --graph" since the low generation numbers would cause the in-degree queue to walk all of the commits in the lower layer before allowing the topo-order queue to write anything to output (depending on the size of the upper layer). Therefore, When writing the new layer in split commit-graph, we write a GDAT chunk only if the topmost layer has a GDAT chunk. This guarantees that if a layer has GDAT chunk, all lower layers must have a GDAT chunk as well. Rewriting layers follows similar approach: if the topmost layer below the set of layers being rewritten (in the split commit-graph chain) exists, and it does not contain GDAT chunk, then the result of rewrite does not have GDAT chunks either. Signed-off-by: Derrick Stolee <[email protected]> Signed-off-by: Abhishek Kumar <[email protected]> Reviewed-by: Taylor Blau <[email protected]> Reviewed-by: Derrick Stolee <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent e8b6300 commit 1fdc383

File tree

3 files changed

+210
-2
lines changed

3 files changed

+210
-2
lines changed

commit-graph.c

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,21 @@ static struct commit_graph *load_commit_graph_chain(struct repository *r,
614614
return graph_chain;
615615
}
616616

617+
static void validate_mixed_generation_chain(struct commit_graph *g)
618+
{
619+
int read_generation_data;
620+
621+
if (!g)
622+
return;
623+
624+
read_generation_data = !!g->chunk_generation_data;
625+
626+
while (g) {
627+
g->read_generation_data = read_generation_data;
628+
g = g->base_graph;
629+
}
630+
}
631+
617632
struct commit_graph *read_commit_graph_one(struct repository *r,
618633
struct object_directory *odb)
619634
{
@@ -622,6 +637,8 @@ struct commit_graph *read_commit_graph_one(struct repository *r,
622637
if (!g)
623638
g = load_commit_graph_chain(r, odb);
624639

640+
validate_mixed_generation_chain(g);
641+
625642
return g;
626643
}
627644

@@ -791,7 +808,7 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g,
791808
date_low = get_be32(commit_data + g->hash_len + 12);
792809
item->date = (timestamp_t)((date_high << 32) | date_low);
793810

794-
if (g->chunk_generation_data) {
811+
if (g->read_generation_data) {
795812
offset = (timestamp_t)get_be32(g->chunk_generation_data + sizeof(uint32_t) * lex_index);
796813

797814
if (offset & CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW) {
@@ -2019,6 +2036,13 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx)
20192036
if (i < ctx->num_commit_graphs_after)
20202037
ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid));
20212038

2039+
/*
2040+
* If the topmost remaining layer has generation data chunk, the
2041+
* resultant layer also has generation data chunk.
2042+
*/
2043+
if (i == ctx->num_commit_graphs_after - 2)
2044+
ctx->write_generation_data = !!g->chunk_generation_data;
2045+
20222046
i--;
20232047
g = g->base_graph;
20242048
}
@@ -2343,6 +2367,8 @@ int write_commit_graph(struct object_directory *odb,
23432367
} else
23442368
ctx->num_commit_graphs_after = 1;
23452369

2370+
validate_mixed_generation_chain(ctx->r->objects->commit_graph);
2371+
23462372
compute_generation_numbers(ctx);
23472373

23482374
if (ctx->changed_paths)
@@ -2541,7 +2567,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
25412567
* also GENERATION_NUMBER_V1_MAX. Decrement to avoid extra logic
25422568
* in the following condition.
25432569
*/
2544-
if (!g->chunk_generation_data && max_generation == GENERATION_NUMBER_V1_MAX)
2570+
if (!g->read_generation_data && max_generation == GENERATION_NUMBER_V1_MAX)
25452571
max_generation--;
25462572

25472573
generation = commit_graph_generation(graph_commit);

commit-graph.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ struct commit_graph {
6464
struct object_directory *odb;
6565

6666
uint32_t num_commits_in_base;
67+
unsigned int read_generation_data;
6768
struct commit_graph *base_graph;
6869

6970
const uint32_t *chunk_oid_fanout;

t/t5324-split-commit-graph.sh

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,4 +453,185 @@ test_expect_success 'prevent regression for duplicate commits across layers' '
453453
git -C dup commit-graph verify
454454
'
455455

456+
NUM_FIRST_LAYER_COMMITS=64
457+
NUM_SECOND_LAYER_COMMITS=16
458+
NUM_THIRD_LAYER_COMMITS=7
459+
NUM_FOURTH_LAYER_COMMITS=8
460+
NUM_FIFTH_LAYER_COMMITS=16
461+
SECOND_LAYER_SEQUENCE_START=$(($NUM_FIRST_LAYER_COMMITS + 1))
462+
SECOND_LAYER_SEQUENCE_END=$(($SECOND_LAYER_SEQUENCE_START + $NUM_SECOND_LAYER_COMMITS - 1))
463+
THIRD_LAYER_SEQUENCE_START=$(($SECOND_LAYER_SEQUENCE_END + 1))
464+
THIRD_LAYER_SEQUENCE_END=$(($THIRD_LAYER_SEQUENCE_START + $NUM_THIRD_LAYER_COMMITS - 1))
465+
FOURTH_LAYER_SEQUENCE_START=$(($THIRD_LAYER_SEQUENCE_END + 1))
466+
FOURTH_LAYER_SEQUENCE_END=$(($FOURTH_LAYER_SEQUENCE_START + $NUM_FOURTH_LAYER_COMMITS - 1))
467+
FIFTH_LAYER_SEQUENCE_START=$(($FOURTH_LAYER_SEQUENCE_END + 1))
468+
FIFTH_LAYER_SEQUENCE_END=$(($FIFTH_LAYER_SEQUENCE_START + $NUM_FIFTH_LAYER_COMMITS - 1))
469+
470+
# Current split graph chain:
471+
#
472+
# 16 commits (No GDAT)
473+
# ------------------------
474+
# 64 commits (GDAT)
475+
#
476+
test_expect_success 'setup repo for mixed generation commit-graph-chain' '
477+
graphdir=".git/objects/info/commit-graphs" &&
478+
test_oid_cache <<-EOF &&
479+
oid_version sha1:1
480+
oid_version sha256:2
481+
EOF
482+
git init mixed &&
483+
(
484+
cd mixed &&
485+
git config core.commitGraph true &&
486+
git config gc.writeCommitGraph false &&
487+
for i in $(test_seq $NUM_FIRST_LAYER_COMMITS)
488+
do
489+
test_commit $i &&
490+
git branch commits/$i || return 1
491+
done &&
492+
git commit-graph write --reachable --split &&
493+
graph_read_expect $NUM_FIRST_LAYER_COMMITS &&
494+
test_line_count = 1 $graphdir/commit-graph-chain &&
495+
for i in $(test_seq $SECOND_LAYER_SEQUENCE_START $SECOND_LAYER_SEQUENCE_END)
496+
do
497+
test_commit $i &&
498+
git branch commits/$i || return 1
499+
done &&
500+
GIT_TEST_COMMIT_GRAPH_NO_GDAT=1 git commit-graph write --reachable --split=no-merge &&
501+
test_line_count = 2 $graphdir/commit-graph-chain &&
502+
test-tool read-graph >output &&
503+
cat >expect <<-EOF &&
504+
header: 43475048 1 $(test_oid oid_version) 4 1
505+
num_commits: $NUM_SECOND_LAYER_COMMITS
506+
chunks: oid_fanout oid_lookup commit_metadata
507+
EOF
508+
test_cmp expect output &&
509+
git commit-graph verify &&
510+
cat $graphdir/commit-graph-chain
511+
)
512+
'
513+
514+
# The new layer will be added without generation data chunk as it was not
515+
# present on the layer underneath it.
516+
#
517+
# 7 commits (No GDAT)
518+
# ------------------------
519+
# 16 commits (No GDAT)
520+
# ------------------------
521+
# 64 commits (GDAT)
522+
#
523+
test_expect_success 'do not write generation data chunk if not present on existing tip' '
524+
git clone mixed mixed-no-gdat &&
525+
(
526+
cd mixed-no-gdat &&
527+
for i in $(test_seq $THIRD_LAYER_SEQUENCE_START $THIRD_LAYER_SEQUENCE_END)
528+
do
529+
test_commit $i &&
530+
git branch commits/$i || return 1
531+
done &&
532+
git commit-graph write --reachable --split=no-merge &&
533+
test_line_count = 3 $graphdir/commit-graph-chain &&
534+
test-tool read-graph >output &&
535+
cat >expect <<-EOF &&
536+
header: 43475048 1 $(test_oid oid_version) 4 2
537+
num_commits: $NUM_THIRD_LAYER_COMMITS
538+
chunks: oid_fanout oid_lookup commit_metadata
539+
EOF
540+
test_cmp expect output &&
541+
git commit-graph verify
542+
)
543+
'
544+
545+
# Number of commits in each layer of the split-commit graph before merge:
546+
#
547+
# 8 commits (No GDAT)
548+
# ------------------------
549+
# 7 commits (No GDAT)
550+
# ------------------------
551+
# 16 commits (No GDAT)
552+
# ------------------------
553+
# 64 commits (GDAT)
554+
#
555+
# The top two layers are merged and do not have generation data chunk as layer below them does
556+
# not have generation data chunk.
557+
#
558+
# 15 commits (No GDAT)
559+
# ------------------------
560+
# 16 commits (No GDAT)
561+
# ------------------------
562+
# 64 commits (GDAT)
563+
#
564+
test_expect_success 'do not write generation data chunk if the topmost remaining layer does not have generation data chunk' '
565+
git clone mixed-no-gdat mixed-merge-no-gdat &&
566+
(
567+
cd mixed-merge-no-gdat &&
568+
for i in $(test_seq $FOURTH_LAYER_SEQUENCE_START $FOURTH_LAYER_SEQUENCE_END)
569+
do
570+
test_commit $i &&
571+
git branch commits/$i || return 1
572+
done &&
573+
git commit-graph write --reachable --split --size-multiple 1 &&
574+
test_line_count = 3 $graphdir/commit-graph-chain &&
575+
test-tool read-graph >output &&
576+
cat >expect <<-EOF &&
577+
header: 43475048 1 $(test_oid oid_version) 4 2
578+
num_commits: $(($NUM_THIRD_LAYER_COMMITS + $NUM_FOURTH_LAYER_COMMITS))
579+
chunks: oid_fanout oid_lookup commit_metadata
580+
EOF
581+
test_cmp expect output &&
582+
git commit-graph verify
583+
)
584+
'
585+
586+
# Number of commits in each layer of the split-commit graph before merge:
587+
#
588+
# 16 commits (No GDAT)
589+
# ------------------------
590+
# 15 commits (No GDAT)
591+
# ------------------------
592+
# 16 commits (No GDAT)
593+
# ------------------------
594+
# 64 commits (GDAT)
595+
#
596+
# The top three layers are merged and has generation data chunk as the topmost remaining layer
597+
# has generation data chunk.
598+
#
599+
# 47 commits (GDAT)
600+
# ------------------------
601+
# 64 commits (GDAT)
602+
#
603+
test_expect_success 'write generation data chunk if topmost remaining layer has generation data chunk' '
604+
git clone mixed-merge-no-gdat mixed-merge-gdat &&
605+
(
606+
cd mixed-merge-gdat &&
607+
for i in $(test_seq $FIFTH_LAYER_SEQUENCE_START $FIFTH_LAYER_SEQUENCE_END)
608+
do
609+
test_commit $i &&
610+
git branch commits/$i || return 1
611+
done &&
612+
git commit-graph write --reachable --split --size-multiple 1 &&
613+
test_line_count = 2 $graphdir/commit-graph-chain &&
614+
test-tool read-graph >output &&
615+
cat >expect <<-EOF &&
616+
header: 43475048 1 $(test_oid oid_version) 5 1
617+
num_commits: $(($NUM_SECOND_LAYER_COMMITS + $NUM_THIRD_LAYER_COMMITS + $NUM_FOURTH_LAYER_COMMITS + $NUM_FIFTH_LAYER_COMMITS))
618+
chunks: oid_fanout oid_lookup commit_metadata generation_data
619+
EOF
620+
test_cmp expect output
621+
)
622+
'
623+
624+
test_expect_success 'write generation data chunk when commit-graph chain is replaced' '
625+
git clone mixed mixed-replace &&
626+
(
627+
cd mixed-replace &&
628+
git commit-graph write --reachable --split=replace &&
629+
test_path_is_file $graphdir/commit-graph-chain &&
630+
test_line_count = 1 $graphdir/commit-graph-chain &&
631+
verify_chain_files_exist $graphdir &&
632+
graph_read_expect $(($NUM_FIRST_LAYER_COMMITS + $NUM_SECOND_LAYER_COMMITS)) &&
633+
git commit-graph verify
634+
)
635+
'
636+
456637
test_done

0 commit comments

Comments
 (0)