Skip to content

Commit e8b6300

Browse files
abhishekkumar2718gitster
authored andcommitted
commit-graph: implement generation data chunk
As discovered by Ævar, we cannot increment graph version to distinguish between generation numbers v1 and v2 [1]. Thus, one of pre-requistes before implementing generation number v2 was to distinguish between graph versions in a backwards compatible manner. We are going to introduce a new chunk called Generation DATa chunk (or GDAT). GDAT will store corrected committer date offsets whereas CDAT will still store topological level. Old Git does not understand GDAT chunk and would ignore it, reading topological levels from CDAT. New Git can parse GDAT and take advantage of newer generation numbers, falling back to topological levels when GDAT chunk is missing (as it would happen with a commit-graph written by old Git). We introduce a test environment variable 'GIT_TEST_COMMIT_GRAPH_NO_GDAT' which forces commit-graph file to be written without generation data chunk to emulate a commit-graph file written by old Git. To minimize the space required to store corrrected commit date, Git stores corrected commit date offsets into the commit-graph file, instea of corrected commit dates. This saves us 4 bytes per commit, decreasing the GDAT chunk size by half, but it's possible for the offset to overflow the 4-bytes allocated for storage. As such overflows are and should be exceedingly rare, we use the following overflow management scheme: We introduce a new commit-graph chunk, Generation Data OVerflow ('GDOV') to store corrected commit dates for commits with offsets greater than GENERATION_NUMBER_V2_OFFSET_MAX. If the offset is greater than GENERATION_NUMBER_V2_OFFSET_MAX, we set the MSB of the offset and the other bits store the position of corrected commit date in GDOV chunk, similar to how Extra Edge List is maintained. We test the overflow-related code with the following repo history: F - N - U / \ U - N - U N \ / N - F - N Where the commits denoted by U have committer date of zero seconds since Unix epoch, the commits denoted by N have committer date of 1112354055 (default committer date for the test suite) seconds since Unix epoch and the commits denoted by F have committer date of (2 ^ 31 - 2) seconds since Unix epoch. The largest offset observed is 2 ^ 31, just large enough to overflow. [1]: https://lore.kernel.org/git/[email protected]/ Signed-off-by: Abhishek Kumar <[email protected]> Reviewed-by: Taylor Blau <[email protected]> Reviewed-by: Derrick Stolee <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent c1a0911 commit e8b6300

10 files changed

+200
-32
lines changed

commit-graph.c

Lines changed: 103 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,13 @@ void git_test_write_commit_graph_or_die(void)
3838
#define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
3939
#define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
4040
#define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
41+
#define GRAPH_CHUNKID_GENERATION_DATA 0x47444154 /* "GDAT" */
42+
#define GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW 0x47444f56 /* "GDOV" */
4143
#define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */
4244
#define GRAPH_CHUNKID_BLOOMINDEXES 0x42494458 /* "BIDX" */
4345
#define GRAPH_CHUNKID_BLOOMDATA 0x42444154 /* "BDAT" */
4446
#define GRAPH_CHUNKID_BASE 0x42415345 /* "BASE" */
45-
#define MAX_NUM_CHUNKS 7
47+
#define MAX_NUM_CHUNKS 9
4648

4749
#define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16)
4850

@@ -61,6 +63,8 @@ void git_test_write_commit_graph_or_die(void)
6163
#define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * GRAPH_CHUNKLOOKUP_WIDTH \
6264
+ GRAPH_FANOUT_SIZE + the_hash_algo->rawsz)
6365

66+
#define CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW (1ULL << 31)
67+
6468
/* Remember to update object flag allocation in object.h */
6569
#define REACHABLE (1u<<15)
6670

@@ -394,6 +398,20 @@ struct commit_graph *parse_commit_graph(struct repository *r,
394398
graph->chunk_commit_data = data + chunk_offset;
395399
break;
396400

401+
case GRAPH_CHUNKID_GENERATION_DATA:
402+
if (graph->chunk_generation_data)
403+
chunk_repeated = 1;
404+
else
405+
graph->chunk_generation_data = data + chunk_offset;
406+
break;
407+
408+
case GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW:
409+
if (graph->chunk_generation_data_overflow)
410+
chunk_repeated = 1;
411+
else
412+
graph->chunk_generation_data_overflow = data + chunk_offset;
413+
break;
414+
397415
case GRAPH_CHUNKID_EXTRAEDGES:
398416
if (graph->chunk_extra_edges)
399417
chunk_repeated = 1;
@@ -754,8 +772,8 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g,
754772
{
755773
const unsigned char *commit_data;
756774
struct commit_graph_data *graph_data;
757-
uint32_t lex_index;
758-
uint64_t date_high, date_low;
775+
uint32_t lex_index, offset_pos;
776+
uint64_t date_high, date_low, offset;
759777

760778
while (pos < g->num_commits_in_base)
761779
g = g->base_graph;
@@ -773,7 +791,19 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g,
773791
date_low = get_be32(commit_data + g->hash_len + 12);
774792
item->date = (timestamp_t)((date_high << 32) | date_low);
775793

776-
graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
794+
if (g->chunk_generation_data) {
795+
offset = (timestamp_t)get_be32(g->chunk_generation_data + sizeof(uint32_t) * lex_index);
796+
797+
if (offset & CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW) {
798+
if (!g->chunk_generation_data_overflow)
799+
die(_("commit-graph requires overflow generation data but has none"));
800+
801+
offset_pos = offset ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW;
802+
graph_data->generation = get_be64(g->chunk_generation_data_overflow + 8 * offset_pos);
803+
} else
804+
graph_data->generation = item->date + offset;
805+
} else
806+
graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
777807

778808
if (g->topo_levels)
779809
*topo_level_slab_at(g->topo_levels, item) = get_be32(commit_data + g->hash_len + 8) >> 2;
@@ -945,6 +975,7 @@ struct write_commit_graph_context {
945975
struct oid_array oids;
946976
struct packed_commit_list commits;
947977
int num_extra_edges;
978+
int num_generation_data_overflows;
948979
unsigned long approx_nr_objects;
949980
struct progress *progress;
950981
int progress_done;
@@ -963,7 +994,8 @@ struct write_commit_graph_context {
963994
report_progress:1,
964995
split:1,
965996
changed_paths:1,
966-
order_by_pack:1;
997+
order_by_pack:1,
998+
write_generation_data:1;
967999

9681000
struct topo_level_slab *topo_levels;
9691001
const struct commit_graph_opts *opts;
@@ -1123,6 +1155,45 @@ static int write_graph_chunk_data(struct hashfile *f,
11231155
return 0;
11241156
}
11251157

1158+
static int write_graph_chunk_generation_data(struct hashfile *f,
1159+
struct write_commit_graph_context *ctx)
1160+
{
1161+
int i, num_generation_data_overflows = 0;
1162+
1163+
for (i = 0; i < ctx->commits.nr; i++) {
1164+
struct commit *c = ctx->commits.list[i];
1165+
timestamp_t offset = commit_graph_data_at(c)->generation - c->date;
1166+
display_progress(ctx->progress, ++ctx->progress_cnt);
1167+
1168+
if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) {
1169+
offset = CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW | num_generation_data_overflows;
1170+
num_generation_data_overflows++;
1171+
}
1172+
1173+
hashwrite_be32(f, offset);
1174+
}
1175+
1176+
return 0;
1177+
}
1178+
1179+
static int write_graph_chunk_generation_data_overflow(struct hashfile *f,
1180+
struct write_commit_graph_context *ctx)
1181+
{
1182+
int i;
1183+
for (i = 0; i < ctx->commits.nr; i++) {
1184+
struct commit *c = ctx->commits.list[i];
1185+
timestamp_t offset = commit_graph_data_at(c)->generation - c->date;
1186+
display_progress(ctx->progress, ++ctx->progress_cnt);
1187+
1188+
if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) {
1189+
hashwrite_be32(f, offset >> 32);
1190+
hashwrite_be32(f, (uint32_t) offset);
1191+
}
1192+
}
1193+
1194+
return 0;
1195+
}
1196+
11261197
static int write_graph_chunk_extra_edges(struct hashfile *f,
11271198
struct write_commit_graph_context *ctx)
11281199
{
@@ -1386,6 +1457,9 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
13861457
if (current->date && current->date > max_corrected_commit_date)
13871458
max_corrected_commit_date = current->date - 1;
13881459
commit_graph_data_at(current)->generation = max_corrected_commit_date + 1;
1460+
1461+
if (commit_graph_data_at(current)->generation - current->date > GENERATION_NUMBER_V2_OFFSET_MAX)
1462+
ctx->num_generation_data_overflows++;
13891463
}
13901464
}
13911465
}
@@ -1719,6 +1793,21 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
17191793
chunks[2].id = GRAPH_CHUNKID_DATA;
17201794
chunks[2].size = (hashsz + 16) * ctx->commits.nr;
17211795
chunks[2].write_fn = write_graph_chunk_data;
1796+
1797+
if (git_env_bool(GIT_TEST_COMMIT_GRAPH_NO_GDAT, 0))
1798+
ctx->write_generation_data = 0;
1799+
if (ctx->write_generation_data) {
1800+
chunks[num_chunks].id = GRAPH_CHUNKID_GENERATION_DATA;
1801+
chunks[num_chunks].size = sizeof(uint32_t) * ctx->commits.nr;
1802+
chunks[num_chunks].write_fn = write_graph_chunk_generation_data;
1803+
num_chunks++;
1804+
}
1805+
if (ctx->num_generation_data_overflows) {
1806+
chunks[num_chunks].id = GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW;
1807+
chunks[num_chunks].size = sizeof(timestamp_t) * ctx->num_generation_data_overflows;
1808+
chunks[num_chunks].write_fn = write_graph_chunk_generation_data_overflow;
1809+
num_chunks++;
1810+
}
17221811
if (ctx->num_extra_edges) {
17231812
chunks[num_chunks].id = GRAPH_CHUNKID_EXTRAEDGES;
17241813
chunks[num_chunks].size = 4 * ctx->num_extra_edges;
@@ -2139,6 +2228,8 @@ int write_commit_graph(struct object_directory *odb,
21392228
ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0;
21402229
ctx->opts = opts;
21412230
ctx->total_bloom_filter_data_size = 0;
2231+
ctx->write_generation_data = 1;
2232+
ctx->num_generation_data_overflows = 0;
21422233

21432234
bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
21442235
bloom_settings.bits_per_entry);
@@ -2445,16 +2536,17 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
24452536
continue;
24462537

24472538
/*
2448-
* If one of our parents has generation GENERATION_NUMBER_V1_MAX, then
2449-
* our generation is also GENERATION_NUMBER_V1_MAX. Decrement to avoid
2450-
* extra logic in the following condition.
2539+
* If we are using topological level and one of our parents has
2540+
* generation GENERATION_NUMBER_V1_MAX, then our generation is
2541+
* also GENERATION_NUMBER_V1_MAX. Decrement to avoid extra logic
2542+
* in the following condition.
24512543
*/
2452-
if (max_generation == GENERATION_NUMBER_V1_MAX)
2544+
if (!g->chunk_generation_data && max_generation == GENERATION_NUMBER_V1_MAX)
24532545
max_generation--;
24542546

24552547
generation = commit_graph_generation(graph_commit);
2456-
if (generation != max_generation + 1)
2457-
graph_report(_("commit-graph generation for commit %s is %"PRItime" != %"PRItime),
2548+
if (generation < max_generation + 1)
2549+
graph_report(_("commit-graph generation for commit %s is %"PRItime" < %"PRItime),
24582550
oid_to_hex(&cur_oid),
24592551
generation,
24602552
max_generation + 1);

commit-graph.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "oidset.h"
77

88
#define GIT_TEST_COMMIT_GRAPH "GIT_TEST_COMMIT_GRAPH"
9+
#define GIT_TEST_COMMIT_GRAPH_NO_GDAT "GIT_TEST_COMMIT_GRAPH_NO_GDAT"
910
#define GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE "GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE"
1011
#define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS"
1112

@@ -68,6 +69,8 @@ struct commit_graph {
6869
const uint32_t *chunk_oid_fanout;
6970
const unsigned char *chunk_oid_lookup;
7071
const unsigned char *chunk_commit_data;
72+
const unsigned char *chunk_generation_data;
73+
const unsigned char *chunk_generation_data_overflow;
7174
const unsigned char *chunk_extra_edges;
7275
const unsigned char *chunk_base_graphs;
7376
const unsigned char *chunk_bloom_indexes;

commit.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#define GENERATION_NUMBER_INFINITY ((1ULL << 63) - 1)
1515
#define GENERATION_NUMBER_V1_MAX 0x3FFFFFFF
1616
#define GENERATION_NUMBER_ZERO 0
17+
#define GENERATION_NUMBER_V2_OFFSET_MAX ((1ULL << 31) - 1)
1718

1819
struct commit_list {
1920
struct commit *item;

t/README

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,9 @@ GIT_TEST_COMMIT_GRAPH=<boolean>, when true, forces the commit-graph to
393393
be written after every 'git commit' command, and overrides the
394394
'core.commitGraph' setting to true.
395395

396+
GIT_TEST_COMMIT_GRAPH_NO_GDAT=<boolean>, when true, forces the
397+
commit-graph to be written without generation data chunk.
398+
396399
GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=<boolean>, when true, forces
397400
commit-graph write to compute and write changed path Bloom filters for
398401
every 'git commit-graph write', as if the `--changed-paths` option was

t/helper/test-read-graph.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ int cmd__read_graph(int argc, const char **argv)
3333
printf(" oid_lookup");
3434
if (graph->chunk_commit_data)
3535
printf(" commit_metadata");
36+
if (graph->chunk_generation_data)
37+
printf(" generation_data");
38+
if (graph->chunk_generation_data_overflow)
39+
printf(" generation_data_overflow");
3640
if (graph->chunk_extra_edges)
3741
printf(" extra_edges");
3842
if (graph->chunk_bloom_indexes)

t/t4216-log-bloom.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@ test_expect_success 'setup test - repo, commits, commit graph, log outputs' '
4040
'
4141

4242
graph_read_expect () {
43-
NUM_CHUNKS=5
43+
NUM_CHUNKS=6
4444
cat >expect <<- EOF
4545
header: 43475048 1 $(test_oid oid_version) $NUM_CHUNKS 0
4646
num_commits: $1
47-
chunks: oid_fanout oid_lookup commit_metadata bloom_indexes bloom_data
47+
chunks: oid_fanout oid_lookup commit_metadata generation_data bloom_indexes bloom_data
4848
EOF
4949
test-tool read-graph >actual &&
5050
test_cmp expect actual

0 commit comments

Comments
 (0)