Skip to content

Commit a0a2d75

Browse files
committed
Merge branch 'ds/cache-tree-basics'
Document, clean-up and optimize the code around the cache-tree extension in the index. * ds/cache-tree-basics: cache-tree: speed up consecutive path comparisons cache-tree: use ce_namelen() instead of strlen() index-format: discuss recursion of cache-tree better index-format: update preamble to cache tree extension index-format: use 'cache tree' over 'cached tree' cache-tree: trace regions for prime_cache_tree cache-tree: trace regions for I/O cache-tree: use trace2 in cache_tree_update() unpack-trees: add trace2 regions tree-walk: report recursion counts
2 parents b65b9ff + a4b6d20 commit a0a2d75

File tree

5 files changed

+94
-18
lines changed

5 files changed

+94
-18
lines changed

Documentation/technical/index-format.txt

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ Git index format
2626
Extensions are identified by signature. Optional extensions can
2727
be ignored if Git does not understand them.
2828

29-
Git currently supports cached tree and resolve undo extensions.
29+
Git currently supports cache tree and resolve undo extensions.
3030

3131
4-byte extension signature. If the first byte is 'A'..'Z' the
3232
extension is optional and can be ignored.
@@ -136,14 +136,35 @@ Git index format
136136

137137
== Extensions
138138

139-
=== Cached tree
140-
141-
Cached tree extension contains pre-computed hashes for trees that can
142-
be derived from the index. It helps speed up tree object generation
143-
from index for a new commit.
144-
145-
When a path is updated in index, the path must be invalidated and
146-
removed from tree cache.
139+
=== Cache tree
140+
141+
Since the index does not record entries for directories, the cache
142+
entries cannot describe tree objects that already exist in the object
143+
database for regions of the index that are unchanged from an existing
144+
commit. The cache tree extension stores a recursive tree structure that
145+
describes the trees that already exist and completely match sections of
146+
the cache entries. This speeds up tree object generation from the index
147+
for a new commit by only computing the trees that are "new" to that
148+
commit. It also assists when comparing the index to another tree, such
149+
as `HEAD^{tree}`, since sections of the index can be skipped when a tree
150+
comparison demonstrates equality.
151+
152+
The recursive tree structure uses nodes that store a number of cache
153+
entries, a list of subnodes, and an object ID (OID). The OID references
154+
the existing tree for that node, if it is known to exist. The subnodes
155+
correspond to subdirectories that themselves have cache tree nodes. The
156+
number of cache entries corresponds to the number of cache entries in
157+
the index that describe paths within that tree's directory.
158+
159+
The extension tracks the full directory structure in the cache tree
160+
extension, but this is generally smaller than the full cache entry list.
161+
162+
When a path is updated in index, Git invalidates all nodes of the
163+
recursive cache tree corresponding to the parent directories of that
164+
path. We store these tree nodes as being "invalid" by using "-1" as the
165+
number of cache entries. Invalid nodes still store a span of index
166+
entries, allowing Git to focus its efforts when reconstructing a full
167+
cache tree.
147168

148169
The signature for this extension is { 'T', 'R', 'E', 'E' }.
149170

@@ -174,7 +195,8 @@ Git index format
174195
first entry represents the root level of the repository, followed by the
175196
first subtree--let's call this A--of the root level (with its name
176197
relative to the root level), followed by the first subtree of A (with
177-
its name relative to A), ...
198+
its name relative to A), and so on. The specified number of subtrees
199+
indicates when the current level of the recursive stack is complete.
178200

179201
=== Resolve undo
180202

cache-tree.c

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -185,12 +185,14 @@ static int verify_cache(struct cache_entry **cache,
185185
* the cache is sorted. Also path can appear only once,
186186
* which means conflicting one would immediately follow.
187187
*/
188-
const char *this_name = cache[i]->name;
189-
const char *next_name = cache[i+1]->name;
190-
int this_len = strlen(this_name);
191-
if (this_len < strlen(next_name) &&
192-
strncmp(this_name, next_name, this_len) == 0 &&
193-
next_name[this_len] == '/') {
188+
const struct cache_entry *this_ce = cache[i];
189+
const struct cache_entry *next_ce = cache[i + 1];
190+
const char *this_name = this_ce->name;
191+
const char *next_name = next_ce->name;
192+
int this_len = ce_namelen(this_ce);
193+
if (this_len < ce_namelen(next_ce) &&
194+
next_name[this_len] == '/' &&
195+
strncmp(this_name, next_name, this_len) == 0) {
194196
if (10 < ++funny) {
195197
fprintf(stderr, "...\n");
196198
break;
@@ -442,7 +444,9 @@ int cache_tree_update(struct index_state *istate, int flags)
442444
if (i)
443445
return i;
444446
trace_performance_enter();
447+
trace2_region_enter("cache_tree", "update", the_repository);
445448
i = update_one(it, cache, entries, "", 0, &skip, flags);
449+
trace2_region_leave("cache_tree", "update", the_repository);
446450
trace_performance_leave("cache_tree_update");
447451
if (i < 0)
448452
return i;
@@ -492,7 +496,9 @@ static void write_one(struct strbuf *buffer, struct cache_tree *it,
492496

493497
void cache_tree_write(struct strbuf *sb, struct cache_tree *root)
494498
{
499+
trace2_region_enter("cache_tree", "write", the_repository);
495500
write_one(sb, root, "", 0);
501+
trace2_region_leave("cache_tree", "write", the_repository);
496502
}
497503

498504
static struct cache_tree *read_one(const char **buffer, unsigned long *size_p)
@@ -581,9 +587,16 @@ static struct cache_tree *read_one(const char **buffer, unsigned long *size_p)
581587

582588
struct cache_tree *cache_tree_read(const char *buffer, unsigned long size)
583589
{
590+
struct cache_tree *result;
591+
584592
if (buffer[0])
585593
return NULL; /* not the whole tree */
586-
return read_one(&buffer, &size);
594+
595+
trace2_region_enter("cache_tree", "read", the_repository);
596+
result = read_one(&buffer, &size);
597+
trace2_region_leave("cache_tree", "read", the_repository);
598+
599+
return result;
587600
}
588601

589602
static struct cache_tree *cache_tree_find(struct cache_tree *it, const char *path)
@@ -733,10 +746,13 @@ void prime_cache_tree(struct repository *r,
733746
struct index_state *istate,
734747
struct tree *tree)
735748
{
749+
trace2_region_enter("cache-tree", "prime_cache_tree", the_repository);
736750
cache_tree_free(&istate->cache_tree);
737751
istate->cache_tree = cache_tree();
752+
738753
prime_cache_tree_rec(r, istate->cache_tree, tree);
739754
istate->cache_changed |= CACHE_TREE_CHANGED;
755+
trace2_region_leave("cache-tree", "prime_cache_tree", the_repository);
740756
}
741757

742758
/*

t/t7104-reset-hard.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ test_expect_success 'reset --hard should restore unmerged ones' '
3333
3434
'
3535

36-
test_expect_success 'reset --hard did not corrupt index or cached-tree' '
36+
test_expect_success 'reset --hard did not corrupt index or cache-tree' '
3737
3838
T=$(git write-tree) &&
3939
rm -f .git/index &&

tree-walk.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "object-store.h"
55
#include "tree.h"
66
#include "pathspec.h"
7+
#include "json-writer.h"
78

89
static const char *get_mode(const char *str, unsigned int *modep)
910
{
@@ -167,6 +168,25 @@ int tree_entry_gently(struct tree_desc *desc, struct name_entry *entry)
167168
return 1;
168169
}
169170

171+
static int traverse_trees_atexit_registered;
172+
static int traverse_trees_count;
173+
static int traverse_trees_cur_depth;
174+
static int traverse_trees_max_depth;
175+
176+
static void trace2_traverse_trees_statistics_atexit(void)
177+
{
178+
struct json_writer jw = JSON_WRITER_INIT;
179+
180+
jw_object_begin(&jw, 0);
181+
jw_object_intmax(&jw, "traverse_trees_count", traverse_trees_count);
182+
jw_object_intmax(&jw, "traverse_trees_max_depth", traverse_trees_max_depth);
183+
jw_end(&jw);
184+
185+
trace2_data_json("traverse_trees", the_repository, "statistics", &jw);
186+
187+
jw_release(&jw);
188+
}
189+
170190
void setup_traverse_info(struct traverse_info *info, const char *base)
171191
{
172192
size_t pathlen = strlen(base);
@@ -180,6 +200,11 @@ void setup_traverse_info(struct traverse_info *info, const char *base)
180200
info->namelen = pathlen;
181201
if (pathlen)
182202
info->prev = &dummy;
203+
204+
if (trace2_is_enabled() && !traverse_trees_atexit_registered) {
205+
atexit(trace2_traverse_trees_statistics_atexit);
206+
traverse_trees_atexit_registered = 1;
207+
}
183208
}
184209

185210
char *make_traverse_path(char *path, size_t pathlen,
@@ -416,6 +441,12 @@ int traverse_trees(struct index_state *istate,
416441
int interesting = 1;
417442
char *traverse_path;
418443

444+
traverse_trees_count++;
445+
traverse_trees_cur_depth++;
446+
447+
if (traverse_trees_cur_depth > traverse_trees_max_depth)
448+
traverse_trees_max_depth = traverse_trees_cur_depth;
449+
419450
if (n >= ARRAY_SIZE(entry))
420451
BUG("traverse_trees() called with too many trees (%d)", n);
421452

@@ -515,6 +546,8 @@ int traverse_trees(struct index_state *istate,
515546
free(traverse_path);
516547
info->traverse_path = NULL;
517548
strbuf_release(&base);
549+
550+
traverse_trees_cur_depth--;
518551
return error;
519552
}
520553

unpack-trees.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1580,6 +1580,8 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
15801580
die("unpack_trees takes at most %d trees", MAX_UNPACK_TREES);
15811581

15821582
trace_performance_enter();
1583+
trace2_region_enter("unpack_trees", "unpack_trees", the_repository);
1584+
15831585
if (!core_apply_sparse_checkout || !o->update)
15841586
o->skip_sparse_checkout = 1;
15851587
if (!o->skip_sparse_checkout && !o->pl) {
@@ -1653,7 +1655,9 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
16531655
}
16541656

16551657
trace_performance_enter();
1658+
trace2_region_enter("unpack_trees", "traverse_trees", the_repository);
16561659
ret = traverse_trees(o->src_index, len, t, &info);
1660+
trace2_region_leave("unpack_trees", "traverse_trees", the_repository);
16571661
trace_performance_leave("traverse_trees");
16581662
if (ret < 0)
16591663
goto return_failed;
@@ -1741,6 +1745,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
17411745
done:
17421746
if (free_pattern_list)
17431747
clear_pattern_list(&pl);
1748+
trace2_region_leave("unpack_trees", "unpack_trees", the_repository);
17441749
trace_performance_leave("unpack_trees");
17451750
return ret;
17461751

0 commit comments

Comments
 (0)